2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "exec/DynamicShapeInferer.h"
18 #include "util/ShapeInference.h"
26 void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
27 const ir::OperandIndex lhs_idx,
28 const ir::OperandIndex rhs_idx)
30 auto lhs = _tensor_registry->getITensor(lhs_idx);
31 auto lhs_shape = lhs->getShape();
33 auto rhs = _tensor_registry->getITensor(rhs_idx);
34 auto rhs_shape = rhs->getShape();
37 Here, the state after compilation (satic shape inference) could be one of the following:
39 lhs rhs output execution-time shape inf required
40 ------------------------------------------ ---------------------------------
41 case 1) static static static X
42 case 2) one or both are dynamic dynamic O
44 Then nnfw_apply_tensorinf() could change one or both inputs dynamic.
45 So, in this method, we have one more state and we have to re-calculate shape for this shape.
47 case 3) one or both are dynamic static O
49 So, only when all inputs are static, we can skip dynamic shape inference.
51 if ((!lhs->is_dynamic()) && (!rhs->is_dynamic()))
54 auto output_idx = op.getOutputs().at(0);
55 auto output = _tensor_registry->getITensor(output_idx);
57 ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
59 output->applyShape(new_shape);
60 assert(output->buffer() != nullptr);
63 void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
64 const ir::OperandIndex input_ind)
66 // check if input is not dynamic
67 auto input = _tensor_registry->getITensor(input_ind);
68 auto output_shape = input->getShape();
71 Here, the state after compilation (satic shape inference) could be one of the following:
73 input output execution-time shape inf required
74 ------------------------- ---------------------------------
75 case 1) static static X
76 case 2) dynamic dynamic O
78 Then nnfw_apply_tensorinf() could change input dynamic.
79 So, in this method, we have one more state and we have to re-calculate shape for this shape.
81 case 3) dynamic static O
83 So, only when input is static, we can skip dynamic shape inference.
85 if (!input->is_dynamic())
88 auto output_ind = op.getOutputs().at(0);
89 auto output = _tensor_registry->getITensor(output_ind);
91 output->applyShape(output_shape);
92 assert(output->buffer() != nullptr);
95 void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
97 const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
98 const auto input = _tensor_registry->getITensor(input_idx);
100 const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
101 const auto axis = _tensor_registry->getITensor(axis_idx);
103 auto output_ind = op.getOutputs().at(0);
104 auto output = _tensor_registry->getITensor(output_ind);
106 if (!input->is_dynamic() && !output->is_dynamic())
109 auto input_shape = input->getShape();
110 auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
111 const auto rank = input_shape.rank();
112 axis_value = axis_value < 0 ? axis_value + rank : axis_value;
114 ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis_value, rank);
116 output->applyShape(new_shape);
117 assert(output->buffer() != nullptr);
120 void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
122 const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
123 const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
124 auto lhs = _tensor_registry->getITensor(lhs_index);
125 auto rhs = _tensor_registry->getITensor(rhs_index);
127 if (!lhs->is_dynamic() && !rhs->is_dynamic())
130 const auto output_index = op.getOutputs().at(0);
131 auto output = _tensor_registry->getITensor(output_index);
133 auto lhs_shape = lhs->getShape();
134 auto rhs_shape = rhs->getShape();
137 auto new_shape = shape_inference::inferBatchMatMulShape(lhs_shape, rhs_shape, op.param());
138 output->applyShape(new_shape);
141 void DynamicShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
143 const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
144 const auto &input = _tensor_registry->getITensor(input_idx);
146 const auto cluster_idx{
147 op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
148 const auto &cluster = _tensor_registry->getITensor(cluster_idx);
149 assert(cluster->is_constant());
151 if (!input->is_dynamic())
154 auto input_shape = input->getShape();
155 auto cluster_shape = cluster->getShape();
157 auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
160 ir::Shape new_shape =
161 shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf);
163 auto output_ind = op.getOutputs().at(0);
164 auto output = _tensor_registry->getITensor(output_ind);
166 output->applyShape(new_shape);
167 assert(output->buffer() != nullptr);
170 void DynamicShapeInferer::visit(const ir::operation::BCQGather &op)
172 const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
173 const auto &indices = _tensor_registry->getITensor(indices_idx);
175 const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
176 const auto &input_binary = _tensor_registry->getITensor(input_binary_idx);
178 const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
179 const auto &cluster = _tensor_registry->getITensor(cluster_idx);
180 assert(cluster->is_constant());
182 if (!indices->is_dynamic())
185 auto indices_shape = indices->getShape();
186 auto cluster_shape = cluster->getShape();
187 auto rank = input_binary->getShape().rank();
189 auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
192 ir::Shape new_shape = shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
193 cluster_buf, rank, op.param());
195 auto output_ind = op.getOutputs().at(0);
196 auto output = _tensor_registry->getITensor(output_ind);
198 output->applyShape(new_shape);
199 assert(output->buffer() != nullptr);
202 void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
204 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
205 op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
208 void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
210 auto output_ind = op.getOutputs().at(0);
211 auto output = _tensor_registry->getITensor(output_ind);
213 auto input_idx = op.getInputs().at(ir::operation::BroadcastTo::INPUT);
214 auto input = _tensor_registry->getITensor(input_idx);
216 if ((!input->is_dynamic()) && (!output->is_dynamic()))
219 auto shape_idx = op.getInputs().at(ir::operation::Tile::Input::MULTIPLES);
220 const auto &shape = _tensor_registry->getITensor(shape_idx);
222 assert(shape); // It shouldn't be 0.
224 auto output_shape = shape_inference::inferBroadcastToShape(
225 shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
227 // set output shape and output buffer
228 output->applyShape(output_shape);
229 assert(output->buffer() != nullptr);
232 void DynamicShapeInferer::visit(const ir::operation::Comparison &op)
234 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
235 op.getInputs().at(ir::operation::Comparison::Input::INPUT1));
238 void DynamicShapeInferer::visit(const ir::operation::Concat &op)
241 The state after compilation (satic shape inference) could be one of the following:
243 inputs output execution-time shape inf required
244 ------------------------------------------ ---------------------------------
245 case 1) all static static X
246 case 2) at least on is dynamic dynamic O
248 Then nnfw_apply_tensorinf() could change one or both inputs dynamic.
249 So, in this method, we have one more state and we have to re-calculate shape for this shape.
251 case 3) at least on is dynamic static O
253 So, only when all inputs are static, we can skip dynamic shape inference.
255 bool all_static = true;
256 for (auto input_ind : op.getInputs())
258 auto input = _tensor_registry->getITensor(input_ind);
259 if (input->is_dynamic())
271 auto isConcatible = [](const backend::ITensor *input1, const backend::ITensor *input2,
273 if (input1->num_dimensions() != input2->num_dimensions())
276 for (size_t i = 0; i < input1->num_dimensions(); i++)
278 auto positive_axis = (axis >= 0) ? axis : axis + input1->num_dimensions();
280 if (i != positive_axis)
281 if (input1->dimension(i) != input2->dimension(i))
288 auto first_input_ind = op.getInputs().at(0);
289 auto first_input = _tensor_registry->getITensor(first_input_ind);
291 for (auto input_ind : op.getInputs())
293 auto input = _tensor_registry->getITensor(input_ind);
294 if (input != first_input && !isConcatible(first_input, input, op.param().axis))
295 throw std::runtime_error("input shapes does not matched for concat");
299 // getting output shape
300 onert::shape_inference::Shapes in_shapes;
301 for (auto input_ind : op.getInputs())
303 auto input = _tensor_registry->getITensor(input_ind);
304 ir::Shape shape = input->getShape();
306 in_shapes.emplace_back(shape);
309 auto output_ind = op.getOutputs().at(0);
310 auto output = _tensor_registry->getITensor(output_ind);
311 auto output_shape = shape_inference::inferConcatShape(in_shapes, op.param());
313 output->applyShape(output_shape);
316 void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
318 // check if input is not dynamic
319 auto input_ind = op.getInputs().at(ir::operation::Conv2D::INPUT);
320 auto input = _tensor_registry->getITensor(input_ind);
322 auto ker_ind = op.getInputs().at(ir::operation::Conv2D::KERNEL);
323 auto ker = _tensor_registry->getITensor(ker_ind);
325 if ((!input->is_dynamic()) && (!ker->is_dynamic()))
328 ir::Shape input_shape = input->getShape();
329 ir::Shape ker_shape = ker->getShape();
331 auto output_ind = op.getOutputs().at(0);
332 auto output = _tensor_registry->getITensor(output_ind);
334 ir::Shape output_shape = shape_inference::inferConv2DShape(input_shape, ker_shape, op.param());
336 output->applyShape(output_shape);
337 assert(output->buffer() != nullptr);
340 void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
342 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT));
345 void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
347 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
348 op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
351 void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
353 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
356 void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
358 // check if input is not dynamic
359 auto input_ind = op.getInputs().at(ir::operation::ExpandDims::INPUT);
360 auto input = _tensor_registry->getITensor(input_ind);
362 // check if output is not dynamic, meaning when 1st input is static and 2nd input is const
363 auto output_ind = op.getOutputs().at(0);
364 auto output = _tensor_registry->getITensor(output_ind);
367 Here, the state after compilation (satic shape inference) could be one of the following:
369 input1 input2 output execution-time shape inf required
370 ----------------------------- --------------------------------
371 case 1) static const static X
372 case 2) static placeholder dynamic O
373 case 3) dynamic const dynamic O
374 case 4) dynamic placeholder dynamic O
376 Then nnfw_apply_tensorinf() could change input dynamic.
377 So, in this method, we could have one more state and we have to re-calculate shape
380 case 5) dynamic const static O
382 So, only when input1 and ouput are static, we can skip dynamic shape inference.
384 if ((!input->is_dynamic()) && (!output->is_dynamic()))
387 ir::Shape input_shape = input->getShape();
389 auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS);
390 auto axis = _tensor_registry->getITensor(axis_ind);
391 auto axis_buf = reinterpret_cast<const int32_t *>(axis->buffer());
394 auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]);
396 output->applyShape(output_shape);
397 assert(output->buffer() != nullptr);
400 void DynamicShapeInferer::visit(const ir::operation::Fill &op)
402 // check if output is not dynamic
403 auto output_ind = op.getOutputs().at(0);
404 auto output = _tensor_registry->getITensor(output_ind);
405 auto input_ind = op.getInputs().at(ir::operation::Fill::Input::INPUT);
406 auto input = _tensor_registry->getITensor(input_ind);
407 ir::Shape input_shape = input->getShape();
409 if ((!input->is_dynamic()) && (!output->is_dynamic()))
412 assert(input->data_type() == ir::DataType::INT32);
414 auto input_buf = reinterpret_cast<const int32_t *>(input->buffer());
417 auto output_shape = shape_inference::inferFillShape(input_shape, input_buf);
419 output->applyShape(output_shape);
420 assert(output->buffer() != nullptr);
423 void DynamicShapeInferer::visit(const ir::operation::FullyConnected &op)
425 const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
426 const auto &input = _tensor_registry->getITensor(input_idx);
428 const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
429 const auto &ker = _tensor_registry->getITensor(ker_idx);
431 if (!input->is_dynamic() && !ker->is_dynamic())
434 auto input_shape = input->getShape();
435 auto ker_shape = ker->getShape();
437 ir::Shape new_shape = shape_inference::inferFullyConnectedShape(input_shape, ker_shape);
439 auto output_ind = op.getOutputs().at(0);
440 auto output = _tensor_registry->getITensor(output_ind);
442 output->applyShape(new_shape);
443 assert(output->buffer() != nullptr);
446 void DynamicShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
448 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT));
451 void DynamicShapeInferer::visit(const ir::operation::Gather &op)
453 const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
454 const auto &input = _tensor_registry->getITensor(input_idx);
455 auto input_shape = input->getShape();
457 const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
458 const auto &indices = _tensor_registry->getITensor(indices_idx);
459 auto indices_shape = indices->getShape();
461 if (!(input->is_dynamic()) && !(indices->is_dynamic()))
464 const auto rank = input_shape.rank();
465 const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
467 assert(0 <= axis && axis < rank);
469 ir::Shape new_shape = shape_inference::inferGatherShape(input_shape, indices_shape, axis, rank);
471 auto output_ind = op.getOutputs().at(0);
472 auto output = _tensor_registry->getITensor(output_ind);
474 output->applyShape(new_shape);
475 assert(output->buffer() != nullptr);
478 void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
480 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
483 void DynamicShapeInferer::visit(const ir::operation::LSTM &op)
485 const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
486 auto output = _tensor_registry->getITensor(output_index);
488 const auto output_state_out_index{
489 op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
491 const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
493 const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
495 if (!output->is_dynamic() &&
496 !(_tensor_registry->getITensor(output_state_out_index) != nullptr &&
497 _tensor_registry->getITensor(output_state_out_index)->is_dynamic()) &&
498 !(_tensor_registry->getITensor(cell_state_out_index) != nullptr &&
499 _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()) &&
500 !(_tensor_registry->getITensor(scratch_buffer_index) != nullptr &&
501 _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()))
504 const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
505 const auto input = _tensor_registry->getITensor(input_index);
506 const auto input_shape = input->getShape();
508 const auto input_to_output_weights_index{
509 op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
510 const auto input_to_output_weights = _tensor_registry->getITensor(input_to_output_weights_index);
511 const auto input_to_output_weights_shape = input_to_output_weights->getShape();
513 const auto recurrent_to_output_weights_index{
514 op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
515 const auto recurrent_to_output_weights =
516 _tensor_registry->getITensor(recurrent_to_output_weights_index);
517 const auto recurrent_to_output_weights_shape = recurrent_to_output_weights->getShape();
521 (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0);
522 const int n_cell = input_to_output_weights_shape.dim(0);
523 const int n_output = recurrent_to_output_weights_shape.dim(1);
524 if (input_shape.rank() == 3)
526 if (op.param().time_major)
527 output->applyShape(ir::Shape{input_shape.dim(0), n_batch, n_output});
529 output->applyShape(ir::Shape{n_batch, input_shape.dim(1), n_output});
533 assert(input_shape.rank() == 2);
534 output->applyShape(ir::Shape{n_batch, n_output});
536 assert(output->buffer() != nullptr);
538 auto output_state_out = _tensor_registry->getITensor(output_state_out_index);
539 if (output_state_out != nullptr)
541 output_state_out->applyShape(ir::Shape{n_batch, n_output});
542 assert(output_state_out->buffer() != nullptr);
545 auto cell_state_out = _tensor_registry->getITensor(cell_state_out_index);
546 if (cell_state_out != nullptr)
548 cell_state_out->applyShape(ir::Shape{n_batch, n_cell});
549 assert(cell_state_out->buffer() != nullptr);
552 auto scratch_buffer = _tensor_registry->getITensor(scratch_buffer_index);
553 if (scratch_buffer != nullptr)
555 const auto input_to_input_weights_index{
556 op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
557 const auto recurrent_to_input_weights_index{
558 op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
560 const auto input_to_input_weights_shape =
561 _tensor_registry->getITensor(input_to_input_weights_index)->getShape();
562 bool has_input_to_input_weights =
563 input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0;
565 const auto recurrent_to_input_weights_shape =
566 _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape();
567 bool has_recurrent_to_input_weights = recurrent_to_input_weights_shape.dim(0) != 0 &&
568 recurrent_to_input_weights_shape.dim(1) != 0;
570 // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
573 bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
576 scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 4});
580 scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 3});
582 assert(scratch_buffer->buffer() != nullptr);
586 void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
588 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
591 void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
593 auto output_ind = op.getOutputs().at(0);
594 auto output = _tensor_registry->getITensor(output_ind);
596 auto indices_ind = op.getInputs().at(ir::operation::OneHot::INDICES);
597 const auto &indices = _tensor_registry->getITensor(indices_ind);
598 auto indices_shape = indices->getShape();
600 auto depth_ind = op.getInputs().at(ir::operation::OneHot::DEPTH);
601 const auto &depth = _tensor_registry->getITensor(depth_ind);
603 if (!indices->is_dynamic() && !depth->is_dynamic())
608 int32_t *depth_buf = reinterpret_cast<int32_t *>(depth->buffer());
610 const auto axis_val = op.param().axis;
612 ir::Shape new_shape = shape_inference::inferOnehotShape(indices_shape, *depth_buf, axis_val);
613 output->applyShape(new_shape);
614 assert(output->buffer() != nullptr);
617 void DynamicShapeInferer::visit(const ir::operation::Pack &op)
619 bool is_any_of_inputs_dynamic = [&]() -> bool {
620 for (uint32_t i = 0; i < op.getInputs().size(); ++i)
622 const auto &input = _tensor_registry->getITensor(op.getInputs().at(i));
623 if (input->is_dynamic())
631 const auto input_idx{op.getInputs().at(0)};
632 const auto &input = _tensor_registry->getITensor(input_idx);
633 auto input_shape = input->getShape();
635 auto output_ind = op.getOutputs().at(0);
636 auto output = _tensor_registry->getITensor(output_ind);
638 if (!is_any_of_inputs_dynamic && !output->is_dynamic())
641 const auto rank = input_shape.rank() + 1;
642 const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
643 const auto num = op.param().num;
645 assert(0 <= axis && axis < rank);
647 ir::Shape new_shape = shape_inference::inferPackShape(input_shape, axis, rank, num);
649 output->applyShape(new_shape);
650 assert(output->buffer() != nullptr);
653 void DynamicShapeInferer::visit(const ir::operation::Pad &op)
655 // check if output is not dynamic
656 auto output_ind = op.getOutputs().at(0);
657 auto output = _tensor_registry->getITensor(output_ind);
659 auto input_ind = op.getInputs().at(ir::operation::Pad::Input::INPUT);
660 auto input = _tensor_registry->getITensor(input_ind);
662 auto pad_ind = op.getInputs().at(ir::operation::Pad::Input::PAD);
663 auto pad = _tensor_registry->getITensor(pad_ind);
665 // check if input and output are not dynamic
666 if ((!input->is_dynamic()) && (!output->is_dynamic()))
669 int32_t *pad_buf = reinterpret_cast<int32_t *>(pad->buffer());
673 shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
675 // change output shape and reallocate output tensor memory
676 output->applyShape(output_shape);
677 assert(output->buffer() != nullptr);
680 void DynamicShapeInferer::visit(const ir::operation::Permute & /* op */)
682 // NOTE Permute is a special operation which does not do shape inference before the actual
683 // function(kernel) execution. Shape inference and output allocation will be done in the kernel
684 // on-the-fly, as it must support inter-backend inference/allocation.
687 void DynamicShapeInferer::visit(const ir::operation::Pow &op)
689 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS),
690 op.getInputs().at(ir::operation::Pow::Input::RHS));
693 void DynamicShapeInferer::visit(const ir::operation::Range &op)
695 // check if output is not dynamic
696 auto output_ind = op.getOutputs().at(0);
697 auto output = _tensor_registry->getITensor(output_ind);
699 // from op, access the buffer of second input to read new shape
700 auto start_idx = op.getInputs().at(ir::operation::Range::Input::START);
701 auto start_tensor = _tensor_registry->getITensor(start_idx);
703 auto limit_idx = op.getInputs().at(ir::operation::Range::Input::LIMIT);
704 auto limit_tensor = _tensor_registry->getITensor(limit_idx);
706 auto delta_idx = op.getInputs().at(ir::operation::Range::Input::DELTA);
707 auto delta_tensor = _tensor_registry->getITensor(delta_idx);
709 if (!start_tensor->is_dynamic() && !limit_tensor->is_dynamic() && !delta_tensor->is_dynamic() &&
710 !output->is_dynamic())
714 if (output->data_type() == ir::DataType::FLOAT32)
717 shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
718 *reinterpret_cast<float *>(limit_tensor->buffer()),
719 *reinterpret_cast<float *>(delta_tensor->buffer()));
721 else if (output->data_type() == ir::DataType::INT32)
723 new_shape = shape_inference::inferRangeShape<int32_t>(
724 *reinterpret_cast<int32_t *>(start_tensor->buffer()),
725 *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
726 *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
728 output->applyShape(new_shape);
729 assert(output->buffer() != nullptr);
732 void DynamicShapeInferer::visit(const ir::operation::Reduce &op)
734 const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
735 const auto &input = _tensor_registry->getITensor(input_idx);
736 auto input_shape = input->getShape();
738 const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
739 const auto &axes = _tensor_registry->getITensor(axes_idx);
741 if (!input->is_dynamic())
744 std::vector<int32_t> axes_vec;
745 for (uint32_t i = 0; i < axes->getShape().num_elements(); ++i)
747 const auto buffer = axes->buffer() + axes->calcOffset({i});
748 switch (axes->data_type())
750 case ir::DataType::INT32:
752 axes_vec.emplace_back(*reinterpret_cast<const int32_t *>(buffer));
755 case ir::DataType::INT64:
757 axes_vec.emplace_back(*reinterpret_cast<const int64_t *>(buffer));
761 throw std::runtime_error("DynamicShapeInferer " + op.name() + ": Not supported data type");
765 const auto keep_dims = op.param().keep_dims;
767 auto output_ind = op.getOutputs().at(0);
768 auto output = _tensor_registry->getITensor(output_ind);
770 ir::Shape new_shape = shape_inference::inferReduceShape(input_shape, axes_vec, keep_dims);
772 output->applyShape(new_shape);
773 assert(output->buffer() != nullptr);
776 void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
778 // check if output is not dynamic
779 auto output_ind = op.getOutputs().at(0);
780 auto output = _tensor_registry->getITensor(output_ind);
782 auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
783 auto input = _tensor_registry->getITensor(input_ind);
786 Here, the state after compilation (satic shape inference) could be one of the following:
788 input1 input2 (or option) output execution-time shape inf required
789 ------------------------------------ --------------------------------
790 case 1) static const static X
791 case 2) static placeholder dynamic O
792 case 3) dynamic const dynamic O
793 case 4) dynamic placeholder dynamic O
795 Then nnfw_apply_tensorinf() could change input dynamic.
796 So, in this method, we could have one more state and we have to re-calculate shape
799 case 5) dynamic const static O
801 So, only when both input1 and ouput are static, we can skip dynamic shape inference.
803 if ((!input->is_dynamic()) && (!output->is_dynamic()))
806 // New shape is given by second input tensor
807 if (op.getInputs().size() == 2)
809 // from op, access the buffer of second input to read new shape
810 auto new_shape_ind = op.getInputs().at(ir::operation::Reshape::Input::SHAPE);
812 // getting output shape by reading new_shape tensor buffer
813 auto new_shape = _tensor_registry->getITensor(new_shape_ind);
816 int32_t *new_shape_buf = reinterpret_cast<int32_t *>(new_shape->buffer());
817 assert(new_shape_buf);
819 auto output_shape = shape_inference::inferReshapeShape(
820 new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
822 // if shape is changed, change output shape and reallocate output tensor memory
823 if (output_shape != output->getShape() || output->buffer() == nullptr)
825 // change on output shape
826 output->applyShape(output_shape);
828 assert(output->buffer() != nullptr);
830 // New shape is given by option
831 else if (op.param().new_shape.size() != 0)
833 // Let's check the new_shape option
834 auto shape = op.param().new_shape;
835 auto output_shape = shape_inference::inferReshapeShape(shape.data(), shape.size(),
836 input->getShape().num_elements());
838 // if shape is changed, change output shape and reallocate output tensor memory
839 if (output_shape != output->getShape() || output->buffer() == nullptr)
841 // change on output shape
842 output->applyShape(output_shape);
844 assert(output->buffer() != nullptr);
848 throw std::runtime_error("Reshape: new shape is missing");
853 void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
855 // check if output is not dynamic
856 auto output_ind = op.getOutputs().at(0);
857 auto output = _tensor_registry->getITensor(output_ind);
859 auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
860 auto input = _tensor_registry->getITensor(input_ind);
862 if ((!input->is_dynamic()) && (!output->is_dynamic()))
865 // getting output shape from input shape and Params
866 int32_t height_out, width_out;
867 if (op.getInputs().size() == 2)
869 auto size_ind = op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE);
870 auto size = _tensor_registry->getITensor(size_ind);
871 if (size->data_type() == ir::DataType::INT32)
873 auto size_buf = reinterpret_cast<const int32_t *>(size->buffer());
874 height_out = size_buf[0];
875 width_out = size_buf[1];
879 throw std::runtime_error("DynamicShapeInferer ResizeBilinear : Unsupported data type");
884 height_out = op.param().height_out;
885 width_out = op.param().width_out;
888 shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out);
890 // if shape is changed, change output shape and reallocate output tensor memory
891 if (output_shape != output->getShape() || output->buffer() == nullptr)
893 // change on output shape
894 output->applyShape(output_shape);
896 assert(output->buffer() != nullptr);
899 void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
901 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
904 void DynamicShapeInferer::visit(const ir::operation::Select &op)
906 const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
907 const auto &input_cond = _tensor_registry->getITensor(input_cond_idx);
909 const auto input_true_idx = op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE);
910 const auto &input_true = _tensor_registry->getITensor(input_true_idx);
912 const auto input_false_idx = op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE);
913 const auto &input_false = _tensor_registry->getITensor(input_false_idx);
915 if ((!input_cond->is_dynamic()) && (!input_true->is_dynamic()) && (!input_false->is_dynamic()))
920 auto input_cond_shape = input_cond->getShape();
921 auto input_true_shape = input_true->getShape();
922 auto input_false_shape = input_false->getShape();
924 // Select output shpae
925 ir::Shape new_shape =
926 shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
928 auto output_ind = op.getOutputs().at(0);
929 auto output = _tensor_registry->getITensor(output_ind);
931 output->applyShape(new_shape);
932 assert(output->buffer() != nullptr);
935 void DynamicShapeInferer::visit(const ir::operation::Shape &op)
937 const auto input_idx{op.getInputs().at(0)};
938 const auto &input = _tensor_registry->getITensor(input_idx);
939 auto input_shape = input->getShape();
941 if (!input->is_dynamic())
944 auto output_ind = op.getOutputs().at(0);
945 auto output = _tensor_registry->getITensor(output_ind);
947 ir::Shape output_shape;
948 output_shape.append(input_shape.rank());
950 output->applyShape(output_shape);
951 assert(output->buffer() != nullptr);
954 void DynamicShapeInferer::visit(const ir::operation::Slice &op)
956 const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
957 const auto input = _tensor_registry->getITensor(input_index);
958 const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
959 const auto begins = _tensor_registry->getITensor(begins_index);
960 const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
961 const auto sizes = _tensor_registry->getITensor(sizes_index);
962 auto output_index = op.getOutputs().at(0);
963 auto output = _tensor_registry->getITensor(output_index);
965 if (!(input->is_dynamic() || begins->is_dynamic() || sizes->is_dynamic() || output->is_dynamic()))
970 ir::Shape input_shape = input->getShape();
971 auto begins_buf = reinterpret_cast<const int32_t *>(begins->buffer());
972 auto sizes_buf = reinterpret_cast<const int32_t *>(sizes->buffer());
974 ir::Shape new_shape = shape_inference::inferSliceShape(input_shape, begins_buf, sizes_buf);
976 output->applyShape(new_shape);
977 assert(output->buffer() != nullptr);
980 void DynamicShapeInferer::visit(const ir::operation::Softmax &op)
982 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::INPUT));
985 void DynamicShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
987 const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
988 const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
989 const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
990 auto output_idx{op.getOutputs().at(0)};
992 const auto &input = _tensor_registry->getITensor(input_idx);
993 const auto &block_shape = _tensor_registry->getITensor(block_shape_idx);
994 const auto &padding = _tensor_registry->getITensor(padding_idx);
995 auto output = _tensor_registry->getITensor(output_idx);
997 if (!(input->is_dynamic() || block_shape->is_dynamic() || padding->is_dynamic() ||
998 output->is_dynamic()))
1003 auto input_shape = input->getShape();
1004 auto block_shape_shape = block_shape->getShape();
1005 auto padding_shape = padding->getShape();
1007 auto block_shape_data = reinterpret_cast<int32_t *>(block_shape->buffer());
1008 auto padding_data = reinterpret_cast<int32_t *>(padding->buffer());
1010 ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
1011 input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
1013 output->applyShape(new_shape);
1014 assert(output->buffer() != nullptr);
1017 void DynamicShapeInferer::visit(const ir::operation::Split &op)
1019 const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
1020 const auto &input = _tensor_registry->getITensor(input_idx);
1022 // Return if all tensors are not dynamic
1023 bool has_dynamic = false;
1024 for (const auto output_idx : op.getOutputs())
1026 auto output = _tensor_registry->getITensor(output_idx);
1027 has_dynamic |= output->is_dynamic();
1029 if (!input->is_dynamic() && !has_dynamic)
1034 auto input_shape = input->getShape();
1036 const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
1037 const auto &axis = _tensor_registry->getITensor(axis_idx);
1039 auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
1040 const auto num_splits = op.param().num_splits;
1041 const auto rank = input_shape.rank();
1042 axis_value = axis_value < 0 ? axis_value + rank : axis_value;
1044 assert(0 <= axis_value && axis_value < rank);
1046 ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_value, num_splits);
1047 for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
1049 auto output_ind = op.getOutputs().at(out_tensor_idx);
1050 auto output = _tensor_registry->getITensor(output_ind);
1052 output->applyShape(new_shape);
1053 assert(output->buffer() != nullptr);
1057 void DynamicShapeInferer::visit(const ir::operation::SquaredDifference &op)
1059 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS),
1060 op.getInputs().at(ir::operation::SquaredDifference::Input::RHS));
1063 void DynamicShapeInferer::visit(const ir::operation::Squeeze &op)
1065 const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
1066 const auto &input = _tensor_registry->getITensor(input_idx);
1068 if (!input->is_dynamic())
1073 auto input_shape = input->getShape();
1075 // Squeeze output shpae
1076 ir::Shape new_shape = shape_inference::inferSqueezeShape(input_shape, op.param());
1078 auto output_ind = op.getOutputs().at(0);
1079 auto output = _tensor_registry->getITensor(output_ind);
1081 output->applyShape(new_shape);
1082 assert(output->buffer() != nullptr);
1085 void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
1088 const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
1089 auto input = _tensor_registry->getITensor(input_index);
1090 ir::Shape input_shape = input->getShape();
1092 const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
1093 auto starts = _tensor_registry->getITensor(starts_index);
1095 const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
1096 auto ends = _tensor_registry->getITensor(ends_index);
1098 const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
1099 auto strides = _tensor_registry->getITensor(strides_index);
1101 if (!(input->is_dynamic() || starts->is_dynamic() || ends->is_dynamic() || strides->is_dynamic()))
1106 const auto begin_mask = op.param().begin_mask;
1107 const auto end_mask = op.param().end_mask;
1108 const auto shrink_axis_mask = op.param().shrink_axis_mask;
1109 const auto rank = input_shape.rank();
1111 auto op_params = shape_inference::buildStridedSliceParams(
1112 reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
1113 reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask,
1116 auto output_index = op.getOutputs().at(0);
1117 auto output = _tensor_registry->getITensor(output_index);
1119 ir::Shape output_shape =
1120 onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
1122 output->applyShape(output_shape);
1123 assert(output->buffer() != nullptr);
1126 void DynamicShapeInferer::visit(const ir::operation::Tile &op)
1128 auto output_ind = op.getOutputs().at(0);
1129 auto output = _tensor_registry->getITensor(output_ind);
1131 auto input_idx = op.getInputs().at(ir::operation::Tile::Input::INPUT);
1132 auto input = _tensor_registry->getITensor(input_idx);
1134 auto multiplier_idx = op.getInputs().at(ir::operation::Tile::Input::MULTIPLES);
1135 auto multiplier = _tensor_registry->getITensor(multiplier_idx);
1137 if ((!input->is_dynamic()) && (!output->is_dynamic()))
1140 auto input_shape = input->getShape();
1141 auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier->buffer());
1142 assert(multiplier_buffer);
1145 shape_inference::inferTileShape(input_shape, multiplier_buffer, multiplier->dimension(0));
1147 // set output shape and output buffer
1148 output->applyShape(output_shape);
1149 assert(output->buffer() != nullptr);
1152 void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
1154 // check if output is not dynamic
1155 auto output_ind = op.getOutputs().at(0);
1156 auto output = _tensor_registry->getITensor(output_ind);
1158 // from op, access the buffer of second input to read new shape
1159 auto input_ind = op.getInputs().at(ir::operation::Transpose::Input::INPUT);
1160 auto input = _tensor_registry->getITensor(input_ind);
1161 auto input_shape = input->getShape();
1164 Here, the state after compilation (static shape inference) could be one of the following:
1166 input perms output execution-time shape inf required
1167 ------------------------------------ --------------------------------
1168 case 1) static const static X
1169 case 2) static non-const dynamic O
1170 case 3) dynamic const dynamic O
1171 case 4) dynamic non-const dynamic O
1173 So, only when both input1 and ouput are static, we can skip dynamic shape inference.
1175 if ((!input->is_dynamic()) && (!output->is_dynamic()))
1178 auto perm_ind = op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION);
1179 auto perm = _tensor_registry->getITensor(perm_ind);
1181 ir::Shape new_shape;
1182 // TODO Change perm->dimension(0) == 0 to perm->num_elements() == 0
1183 if (perm->dimension(0) == 0) // This condition means that perm is (n-1...0)
1185 // Call by (n-1...0)
1186 new_shape = shape_inference::inferTransposeShape(input_shape, nullptr, 0);
1191 if (input->num_dimensions() != perm->getShape().num_elements())
1193 throw std::runtime_error("DynamicShapeInferer failed, bad rank size: " +
1194 std::to_string(perm->getShape().num_elements()));
1197 // set output shape, based on input and params
1198 const auto perm_buffer = reinterpret_cast<const int32_t *>(perm->buffer());
1199 new_shape = shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->dimension(0));
1201 output->applyShape(new_shape);
1202 assert(output->buffer() != nullptr);
1205 void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
1207 // check if output is not dynamic
1208 const auto input_idx{op.getInputs().at(0)};
1209 const auto &input = _tensor_registry->getITensor(input_idx);
1211 if (!input->is_dynamic())
1214 auto input_shape = input->getShape();
1216 const auto rank = input_shape.rank();
1217 const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
1218 const auto num = op.param().num;
1220 assert(0 <= axis && axis < rank);
1222 ir::Shape new_shape = shape_inference::inferUnpackShape(input_shape, axis, rank);
1224 for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
1226 auto output_ind = op.getOutputs().at(out_tensor_idx);
1227 auto output = _tensor_registry->getITensor(output_ind);
1229 output->applyShape(new_shape);
1231 assert(output->buffer() != nullptr);
1236 } // namespace onert