2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "exec/DynamicShapeInferer.h"
18 #include "util/ShapeInference.h"
26 void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
27 const ir::OperandIndex lhs_idx,
28 const ir::OperandIndex rhs_idx)
30 auto lhs = _tensor_registry->getITensor(lhs_idx);
31 auto lhs_shape = lhs->getShape();
33 auto rhs = _tensor_registry->getITensor(rhs_idx);
34 auto rhs_shape = rhs->getShape();
37 Here, the state after compilation (satic shape inference) could be one of the following:
39 lhs rhs output execution-time shape inf required
40 ------------------------------------------ ---------------------------------
41 case 1) static static static X
42 case 2) one or both are dynamic dynamic O
44 Then nnfw_apply_tensorinf() could change one or both inputs dynamic.
45 So, in this method, we have one more state and we have to re-calculate shape for this shape.
47 case 3) one or both are dynamic static O
49 So, only when all inputs are static, we can skip dynamic shape inference.
51 auto output_idx = op.getOutputs().at(0);
52 auto output = _tensor_registry->getITensor(output_idx);
54 if ((currently_static(lhs) && currently_static(rhs)) && previously_static(output))
57 ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
59 output->applyShape(new_shape);
60 assert(output->buffer() != nullptr);
63 void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
64 const ir::OperandIndex input_ind)
66 // check if input is not dynamic
67 auto input = _tensor_registry->getITensor(input_ind);
68 auto output_shape = input->getShape();
71 Here, the state after compilation (satic shape inference) could be one of the following:
73 input output execution-time shape inf required
74 ------------------------- ---------------------------------
75 case 1) static static X
76 case 2) dynamic dynamic O
78 Then nnfw_apply_tensorinf() could change input dynamic.
79 So, in this method, we have one more state and we have to re-calculate shape for this shape.
81 case 3) dynamic static O
83 So, only when input is static, we can skip dynamic shape inference.
85 if (!input->is_dynamic())
88 auto output_ind = op.getOutputs().at(0);
89 auto output = _tensor_registry->getITensor(output_ind);
91 output->applyShape(output_shape);
92 assert(output->buffer() != nullptr);
95 void DynamicShapeInferer::visit(const ir::operation::ArgMinMax &op)
97 const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
98 const auto input = _tensor_registry->getITensor(input_idx);
100 const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
101 const auto axis = _tensor_registry->getITensor(axis_idx);
103 auto output_ind = op.getOutputs().at(0);
104 auto output = _tensor_registry->getITensor(output_ind);
106 if (!input->is_dynamic() && !output->is_dynamic())
109 auto input_shape = input->getShape();
110 auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
111 const auto rank = input_shape.rank();
112 axis_value = axis_value < 0 ? axis_value + rank : axis_value;
114 ir::Shape new_shape = shape_inference::inferArgMinMaxShape(input_shape, axis_value, rank);
116 output->applyShape(new_shape);
117 assert(output->buffer() != nullptr);
120 void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
122 const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
123 const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
124 auto lhs = _tensor_registry->getITensor(lhs_index);
125 auto rhs = _tensor_registry->getITensor(rhs_index);
127 if (!lhs->is_dynamic() && !rhs->is_dynamic())
130 const auto output_index = op.getOutputs().at(0);
131 auto output = _tensor_registry->getITensor(output_index);
133 auto lhs_shape = lhs->getShape();
134 auto rhs_shape = rhs->getShape();
137 auto new_shape = shape_inference::inferBatchMatMulShape(lhs_shape, rhs_shape, op.param());
138 output->applyShape(new_shape);
141 void DynamicShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
143 const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
144 const auto &input = _tensor_registry->getITensor(input_idx);
146 const auto cluster_idx{
147 op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
148 const auto &cluster = _tensor_registry->getITensor(cluster_idx);
149 assert(cluster->is_constant());
151 if (!input->is_dynamic())
154 auto input_shape = input->getShape();
155 auto cluster_shape = cluster->getShape();
157 auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
160 ir::Shape new_shape =
161 shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf);
163 auto output_ind = op.getOutputs().at(0);
164 auto output = _tensor_registry->getITensor(output_ind);
166 output->applyShape(new_shape);
167 assert(output->buffer() != nullptr);
170 void DynamicShapeInferer::visit(const ir::operation::BCQGather &op)
172 const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
173 const auto &indices = _tensor_registry->getITensor(indices_idx);
175 const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
176 const auto &input_binary = _tensor_registry->getITensor(input_binary_idx);
178 const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
179 const auto &cluster = _tensor_registry->getITensor(cluster_idx);
180 assert(cluster->is_constant());
182 if (!indices->is_dynamic())
185 auto indices_shape = indices->getShape();
186 auto cluster_shape = cluster->getShape();
187 auto rank = input_binary->getShape().rank();
189 auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
192 ir::Shape new_shape = shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
193 cluster_buf, rank, op.param());
195 auto output_ind = op.getOutputs().at(0);
196 auto output = _tensor_registry->getITensor(output_ind);
198 output->applyShape(new_shape);
199 assert(output->buffer() != nullptr);
202 void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
204 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
205 op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
208 void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
210 auto output_ind = op.getOutputs().at(0);
211 auto output = _tensor_registry->getITensor(output_ind);
213 auto input_idx = op.getInputs().at(ir::operation::BroadcastTo::INPUT);
214 auto input = _tensor_registry->getITensor(input_idx);
216 if ((!input->is_dynamic()) && (!output->is_dynamic()))
219 auto shape_idx = op.getInputs().at(ir::operation::Tile::Input::MULTIPLES);
220 const auto &shape = _tensor_registry->getITensor(shape_idx);
222 assert(shape); // It shouldn't be 0.
224 auto output_shape = shape_inference::inferBroadcastToShape(
225 shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
227 // set output shape and output buffer
228 output->applyShape(output_shape);
229 assert(output->buffer() != nullptr);
232 void DynamicShapeInferer::visit(const ir::operation::Comparison &op)
234 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
235 op.getInputs().at(ir::operation::Comparison::Input::INPUT1));
238 void DynamicShapeInferer::visit(const ir::operation::Concat &op)
241 The state after compilation (satic shape inference) could be one of the following:
243 inputs output execution-time shape inf required
244 ------------------------------------------ ---------------------------------
245 case 1) all static static X
246 case 2) at least on is dynamic dynamic O
248 Then nnfw_apply_tensorinf() could change one or both inputs dynamic.
249 So, in this method, we have one more state and we have to re-calculate shape for this shape.
251 case 3) at least on is dynamic static O
253 So, only when all inputs are static, we can skip dynamic shape inference.
255 bool all_static = true;
256 for (auto input_ind : op.getInputs())
258 auto input = _tensor_registry->getITensor(input_ind);
259 if (input->is_dynamic())
271 auto isConcatible = [](const backend::ITensor *input1, const backend::ITensor *input2,
273 auto shape1 = input1->getShape();
274 auto shape2 = input2->getShape();
275 if (shape1.rank() != shape2.rank())
278 for (int i = 0; i < shape1.rank(); i++)
280 auto positive_axis = (axis >= 0) ? axis : axis + input1->getShape().rank();
282 if (i != positive_axis)
283 if (shape1.dim(i) != shape2.dim(i))
290 auto first_input_ind = op.getInputs().at(0);
291 auto first_input = _tensor_registry->getITensor(first_input_ind);
293 for (auto input_ind : op.getInputs())
295 auto input = _tensor_registry->getITensor(input_ind);
296 if (input != first_input && !isConcatible(first_input, input, op.param().axis))
297 throw std::runtime_error("input shapes does not matched for concat");
301 // getting output shape
302 onert::shape_inference::Shapes in_shapes;
303 for (auto input_ind : op.getInputs())
305 auto input = _tensor_registry->getITensor(input_ind);
306 ir::Shape shape = input->getShape();
308 in_shapes.emplace_back(shape);
311 auto output_ind = op.getOutputs().at(0);
312 auto output = _tensor_registry->getITensor(output_ind);
313 auto output_shape = shape_inference::inferConcatShape(in_shapes, op.param());
315 output->applyShape(output_shape);
318 void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
320 // check if input is not dynamic
321 auto input_ind = op.getInputs().at(ir::operation::Conv2D::INPUT);
322 auto input = _tensor_registry->getITensor(input_ind);
324 auto ker_ind = op.getInputs().at(ir::operation::Conv2D::KERNEL);
325 auto ker = _tensor_registry->getITensor(ker_ind);
327 if ((!input->is_dynamic()) && (!ker->is_dynamic()))
330 ir::Shape input_shape = input->getShape();
331 ir::Shape ker_shape = ker->getShape();
333 auto output_ind = op.getOutputs().at(0);
334 auto output = _tensor_registry->getITensor(output_ind);
336 ir::Shape output_shape = shape_inference::inferConv2DShape(input_shape, ker_shape, op.param());
338 output->applyShape(output_shape);
339 assert(output->buffer() != nullptr);
342 void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
344 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT));
347 void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
349 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
350 op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
353 void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
355 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
358 void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
360 // check if input is not dynamic
361 auto input_ind = op.getInputs().at(ir::operation::ExpandDims::INPUT);
362 auto input = _tensor_registry->getITensor(input_ind);
364 // check if output is not dynamic, meaning when 1st input is static and 2nd input is const
365 auto output_ind = op.getOutputs().at(0);
366 auto output = _tensor_registry->getITensor(output_ind);
369 Here, the state after compilation (satic shape inference) could be one of the following:
371 input1 input2 output execution-time shape inf required
372 ----------------------------- --------------------------------
373 case 1) static const static X
374 case 2) static placeholder dynamic O
375 case 3) dynamic const dynamic O
376 case 4) dynamic placeholder dynamic O
378 Then nnfw_apply_tensorinf() could change input dynamic.
379 So, in this method, we could have one more state and we have to re-calculate shape
382 case 5) dynamic const static O
384 So, only when input1 and ouput are static, we can skip dynamic shape inference.
386 if ((!input->is_dynamic()) && (!output->is_dynamic()))
389 ir::Shape input_shape = input->getShape();
391 auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS);
392 auto axis = _tensor_registry->getITensor(axis_ind);
393 auto axis_type = axis->data_type();
394 assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
396 assert(axis->buffer());
398 (axis_type == ir::DataType::INT32)
399 ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
400 : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
402 auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value);
404 output->applyShape(output_shape);
405 assert(output->buffer() != nullptr);
408 void DynamicShapeInferer::visit(const ir::operation::Fill &op)
410 // check if output is not dynamic
411 auto output_ind = op.getOutputs().at(0);
412 auto output = _tensor_registry->getITensor(output_ind);
413 auto shape_ind = op.getInputs().at(ir::operation::Fill::Input::SHAPE);
414 auto shape = _tensor_registry->getITensor(shape_ind);
416 if ((!shape->is_dynamic()) && (!output->is_dynamic()))
419 const auto dims_type = shape->data_type();
420 assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
422 auto dims_buf = shape->buffer();
425 const auto &dims_shape = shape->getShape();
426 auto output_shape = ((dims_type == ir::DataType::INT32)
427 ? shape_inference::inferFillShape<int32_t>(
428 dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
429 : shape_inference::inferFillShape<int64_t>(
430 dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
432 output->applyShape(output_shape);
433 assert(output->buffer() != nullptr);
436 void DynamicShapeInferer::visit(const ir::operation::FullyConnected &op)
438 const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
439 const auto &input = _tensor_registry->getITensor(input_idx);
441 const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
442 const auto &ker = _tensor_registry->getITensor(ker_idx);
444 if (!input->is_dynamic() && !ker->is_dynamic())
447 auto input_shape = input->getShape();
448 auto ker_shape = ker->getShape();
450 ir::Shape new_shape = shape_inference::inferFullyConnectedShape(input_shape, ker_shape);
452 auto output_ind = op.getOutputs().at(0);
453 auto output = _tensor_registry->getITensor(output_ind);
455 output->applyShape(new_shape);
456 assert(output->buffer() != nullptr);
459 void DynamicShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
461 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT));
464 void DynamicShapeInferer::visit(const ir::operation::Gather &op)
466 const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
467 const auto &input = _tensor_registry->getITensor(input_idx);
468 auto input_shape = input->getShape();
470 const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
471 const auto &indices = _tensor_registry->getITensor(indices_idx);
472 auto indices_shape = indices->getShape();
474 if (!(input->is_dynamic()) && !(indices->is_dynamic()))
477 const auto rank = input_shape.rank();
478 const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
480 assert(0 <= axis && axis < rank);
482 ir::Shape new_shape = shape_inference::inferGatherShape(input_shape, indices_shape, axis, rank);
484 auto output_ind = op.getOutputs().at(0);
485 auto output = _tensor_registry->getITensor(output_ind);
487 output->applyShape(new_shape);
488 assert(output->buffer() != nullptr);
491 void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
493 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
496 void DynamicShapeInferer::visit(const ir::operation::LSTM &op)
498 const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
499 auto output = _tensor_registry->getITensor(output_index);
501 const auto output_state_out_index{
502 op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
504 const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
506 const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
508 if (!output->is_dynamic() &&
509 !(_tensor_registry->getITensor(output_state_out_index) != nullptr &&
510 _tensor_registry->getITensor(output_state_out_index)->is_dynamic()) &&
511 !(_tensor_registry->getITensor(cell_state_out_index) != nullptr &&
512 _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()) &&
513 !(_tensor_registry->getITensor(scratch_buffer_index) != nullptr &&
514 _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()))
517 const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
518 const auto input = _tensor_registry->getITensor(input_index);
519 const auto input_shape = input->getShape();
521 const auto input_to_output_weights_index{
522 op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
523 const auto input_to_output_weights = _tensor_registry->getITensor(input_to_output_weights_index);
524 const auto input_to_output_weights_shape = input_to_output_weights->getShape();
526 const auto recurrent_to_output_weights_index{
527 op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
528 const auto recurrent_to_output_weights =
529 _tensor_registry->getITensor(recurrent_to_output_weights_index);
530 const auto recurrent_to_output_weights_shape = recurrent_to_output_weights->getShape();
534 (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0);
535 const int n_cell = input_to_output_weights_shape.dim(0);
536 const int n_output = recurrent_to_output_weights_shape.dim(1);
537 if (input_shape.rank() == 3)
539 if (op.param().time_major)
540 output->applyShape(ir::Shape{input_shape.dim(0), n_batch, n_output});
542 output->applyShape(ir::Shape{n_batch, input_shape.dim(1), n_output});
546 assert(input_shape.rank() == 2);
547 output->applyShape(ir::Shape{n_batch, n_output});
549 assert(output->buffer() != nullptr);
551 auto output_state_out = _tensor_registry->getITensor(output_state_out_index);
552 if (output_state_out != nullptr)
554 output_state_out->applyShape(ir::Shape{n_batch, n_output});
555 assert(output_state_out->buffer() != nullptr);
558 auto cell_state_out = _tensor_registry->getITensor(cell_state_out_index);
559 if (cell_state_out != nullptr)
561 cell_state_out->applyShape(ir::Shape{n_batch, n_cell});
562 assert(cell_state_out->buffer() != nullptr);
565 auto scratch_buffer = _tensor_registry->getITensor(scratch_buffer_index);
566 if (scratch_buffer != nullptr)
568 const auto input_to_input_weights_index{
569 op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
570 const auto recurrent_to_input_weights_index{
571 op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
573 const auto input_to_input_weights_shape =
574 _tensor_registry->getITensor(input_to_input_weights_index)->getShape();
575 bool has_input_to_input_weights =
576 input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0;
578 const auto recurrent_to_input_weights_shape =
579 _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape();
580 bool has_recurrent_to_input_weights =
581 recurrent_to_input_weights_shape.dim(0) != 0 && recurrent_to_input_weights_shape.dim(1) != 0;
583 // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
586 bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
589 scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 4});
593 scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 3});
595 assert(scratch_buffer->buffer() != nullptr);
599 void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
601 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
604 void DynamicShapeInferer::visit(const ir::operation::DetectionPostProcess & /* op */)
606 // NOTE DetectionPostProcess's undefined outputs' shape are decided on compile time
607 // by static shape inferer.
608 // DetectionPostProcess's outputs' shape are independent with input shape
609 // and decided by parameter value.
612 void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
614 auto output_ind = op.getOutputs().at(0);
615 auto output = _tensor_registry->getITensor(output_ind);
617 auto indices_ind = op.getInputs().at(ir::operation::OneHot::INDICES);
618 const auto &indices = _tensor_registry->getITensor(indices_ind);
619 auto indices_shape = indices->getShape();
621 auto depth_ind = op.getInputs().at(ir::operation::OneHot::DEPTH);
622 const auto &depth = _tensor_registry->getITensor(depth_ind);
624 if (!indices->is_dynamic() && !depth->is_dynamic())
629 int32_t *depth_buf = reinterpret_cast<int32_t *>(depth->buffer());
631 const auto axis_val = op.param().axis;
633 ir::Shape new_shape = shape_inference::inferOnehotShape(indices_shape, *depth_buf, axis_val);
634 output->applyShape(new_shape);
635 assert(output->buffer() != nullptr);
638 void DynamicShapeInferer::visit(const ir::operation::Pack &op)
640 bool is_any_of_inputs_dynamic = [&]() -> bool {
641 for (uint32_t i = 0; i < op.getInputs().size(); ++i)
643 const auto &input = _tensor_registry->getITensor(op.getInputs().at(i));
644 if (input->is_dynamic())
652 const auto input_idx{op.getInputs().at(0)};
653 const auto &input = _tensor_registry->getITensor(input_idx);
654 auto input_shape = input->getShape();
656 auto output_ind = op.getOutputs().at(0);
657 auto output = _tensor_registry->getITensor(output_ind);
659 if (!is_any_of_inputs_dynamic && !output->is_dynamic())
662 const auto rank = input_shape.rank() + 1;
663 const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
664 const auto num = op.param().num;
666 assert(0 <= axis && axis < rank);
668 ir::Shape new_shape = shape_inference::inferPackShape(input_shape, axis, rank, num);
670 output->applyShape(new_shape);
671 assert(output->buffer() != nullptr);
674 void DynamicShapeInferer::visit(const ir::operation::Pad &op)
676 // check if output is not dynamic
677 auto output_ind = op.getOutputs().at(0);
678 auto output = _tensor_registry->getITensor(output_ind);
680 auto input_ind = op.getInputs().at(ir::operation::Pad::Input::INPUT);
681 auto input = _tensor_registry->getITensor(input_ind);
683 auto pad_ind = op.getInputs().at(ir::operation::Pad::Input::PAD);
684 auto pad = _tensor_registry->getITensor(pad_ind);
686 // check if input and output are not dynamic
687 if ((!input->is_dynamic()) && (!output->is_dynamic()))
690 int32_t *pad_buf = reinterpret_cast<int32_t *>(pad->buffer());
694 shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
696 // change output shape and reallocate output tensor memory
697 output->applyShape(output_shape);
698 assert(output->buffer() != nullptr);
701 void DynamicShapeInferer::visit(const ir::operation::Permute & /* op */)
703 // NOTE Permute is a special operation which does not do shape inference before the actual
704 // function(kernel) execution. Shape inference and output allocation will be done in the kernel
705 // on-the-fly, as it must support inter-backend inference/allocation.
708 void DynamicShapeInferer::visit(const ir::operation::Pow &op)
710 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS),
711 op.getInputs().at(ir::operation::Pow::Input::RHS));
714 void DynamicShapeInferer::visit(const ir::operation::Range &op)
716 // check if output is not dynamic
717 auto output_ind = op.getOutputs().at(0);
718 auto output = _tensor_registry->getITensor(output_ind);
720 // from op, access the buffer of second input to read new shape
721 auto start_idx = op.getInputs().at(ir::operation::Range::Input::START);
722 auto start_tensor = _tensor_registry->getITensor(start_idx);
724 auto limit_idx = op.getInputs().at(ir::operation::Range::Input::LIMIT);
725 auto limit_tensor = _tensor_registry->getITensor(limit_idx);
727 auto delta_idx = op.getInputs().at(ir::operation::Range::Input::DELTA);
728 auto delta_tensor = _tensor_registry->getITensor(delta_idx);
730 if (!start_tensor->is_dynamic() && !limit_tensor->is_dynamic() && !delta_tensor->is_dynamic() &&
731 !output->is_dynamic())
735 if (output->data_type() == ir::DataType::FLOAT32)
738 shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
739 *reinterpret_cast<float *>(limit_tensor->buffer()),
740 *reinterpret_cast<float *>(delta_tensor->buffer()));
742 else if (output->data_type() == ir::DataType::INT32)
744 new_shape = shape_inference::inferRangeShape<int32_t>(
745 *reinterpret_cast<int32_t *>(start_tensor->buffer()),
746 *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
747 *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
749 output->applyShape(new_shape);
750 assert(output->buffer() != nullptr);
753 void DynamicShapeInferer::visit(const ir::operation::Reduce &op)
755 const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
756 const auto &input = _tensor_registry->getITensor(input_idx);
757 auto input_shape = input->getShape();
759 const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
760 const auto &axes = _tensor_registry->getITensor(axes_idx);
762 if (!input->is_dynamic())
765 std::vector<int32_t> axes_vec;
766 for (uint32_t i = 0; i < axes->getShape().num_elements(); ++i)
768 const auto buffer = axes->buffer() + axes->calcOffset({i});
769 switch (axes->data_type())
771 case ir::DataType::INT32:
773 axes_vec.emplace_back(*reinterpret_cast<const int32_t *>(buffer));
776 case ir::DataType::INT64:
778 axes_vec.emplace_back(*reinterpret_cast<const int64_t *>(buffer));
782 throw std::runtime_error("DynamicShapeInferer " + op.name() + ": Not supported data type");
786 const auto keep_dims = op.param().keep_dims;
788 auto output_ind = op.getOutputs().at(0);
789 auto output = _tensor_registry->getITensor(output_ind);
791 ir::Shape new_shape = shape_inference::inferReduceShape(input_shape, axes_vec, keep_dims);
793 output->applyShape(new_shape);
794 assert(output->buffer() != nullptr);
797 void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
799 // check if output is not dynamic
800 auto output_ind = op.getOutputs().at(0);
801 auto output = _tensor_registry->getITensor(output_ind);
803 auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
804 auto input = _tensor_registry->getITensor(input_ind);
807 Here, the state after compilation (satic shape inference) could be one of the following:
809 input1 input2 (or option) output execution-time shape inf required
810 ------------------------------------ --------------------------------
811 case 1) static const static X
812 case 2) static placeholder dynamic O
813 case 3) dynamic const dynamic O
814 case 4) dynamic placeholder dynamic O
816 Then nnfw_apply_tensorinf() could change input dynamic.
817 So, in this method, we could have one more state and we have to re-calculate shape
820 case 5) dynamic const static O
822 So, only when both input1 and ouput are static, we can skip dynamic shape inference.
824 if ((!input->is_dynamic()) && (!output->is_dynamic()))
827 // New shape is given by second input tensor
828 if (op.getInputs().size() == 2)
830 // from op, access the buffer of second input to read new shape
831 auto new_shape_ind = op.getInputs().at(ir::operation::Reshape::Input::SHAPE);
833 // getting output shape by reading new_shape tensor buffer
834 auto new_shape = _tensor_registry->getITensor(new_shape_ind);
837 int32_t *new_shape_buf = reinterpret_cast<int32_t *>(new_shape->buffer());
838 assert(new_shape_buf);
840 auto output_shape = shape_inference::inferReshapeShape(
841 new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
843 // if shape is changed, change output shape and reallocate output tensor memory
844 if (output_shape != output->getShape() || output->buffer() == nullptr)
846 // change on output shape
847 output->applyShape(output_shape);
849 assert(output->buffer() != nullptr);
851 // New shape is given by option
852 else if (op.param().new_shape.size() != 0)
854 // Let's check the new_shape option
855 auto shape = op.param().new_shape;
856 auto output_shape = shape_inference::inferReshapeShape(shape.data(), shape.size(),
857 input->getShape().num_elements());
859 // if shape is changed, change output shape and reallocate output tensor memory
860 if (output_shape != output->getShape() || output->buffer() == nullptr)
862 // change on output shape
863 output->applyShape(output_shape);
865 assert(output->buffer() != nullptr);
869 throw std::runtime_error("Reshape: new shape is missing");
874 void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
876 // check if output is not dynamic
877 auto output_ind = op.getOutputs().at(0);
878 auto output = _tensor_registry->getITensor(output_ind);
880 auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
881 auto input = _tensor_registry->getITensor(input_ind);
883 if ((!input->is_dynamic()) && (!output->is_dynamic()))
886 // getting output shape from input shape and Params
887 int32_t height_out, width_out;
888 if (op.getInputs().size() == 2)
890 auto size_ind = op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE);
891 auto size = _tensor_registry->getITensor(size_ind);
892 if (size->data_type() == ir::DataType::INT32)
894 auto size_buf = reinterpret_cast<const int32_t *>(size->buffer());
895 height_out = size_buf[0];
896 width_out = size_buf[1];
900 throw std::runtime_error("DynamicShapeInferer ResizeBilinear : Unsupported data type");
905 height_out = op.param().height_out;
906 width_out = op.param().width_out;
909 shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out);
911 // if shape is changed, change output shape and reallocate output tensor memory
912 if (output_shape != output->getShape() || output->buffer() == nullptr)
914 // change on output shape
915 output->applyShape(output_shape);
917 assert(output->buffer() != nullptr);
920 void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
922 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
925 void DynamicShapeInferer::visit(const ir::operation::Select &op)
927 const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
928 const auto &input_cond = _tensor_registry->getITensor(input_cond_idx);
930 const auto input_true_idx = op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE);
931 const auto &input_true = _tensor_registry->getITensor(input_true_idx);
933 const auto input_false_idx = op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE);
934 const auto &input_false = _tensor_registry->getITensor(input_false_idx);
936 if ((!input_cond->is_dynamic()) && (!input_true->is_dynamic()) && (!input_false->is_dynamic()))
941 auto input_cond_shape = input_cond->getShape();
942 auto input_true_shape = input_true->getShape();
943 auto input_false_shape = input_false->getShape();
945 // Select output shpae
946 ir::Shape new_shape =
947 shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
949 auto output_ind = op.getOutputs().at(0);
950 auto output = _tensor_registry->getITensor(output_ind);
952 output->applyShape(new_shape);
953 assert(output->buffer() != nullptr);
956 void DynamicShapeInferer::visit(const ir::operation::Shape &op)
958 const auto input_idx{op.getInputs().at(0)};
959 const auto &input = _tensor_registry->getITensor(input_idx);
960 auto input_shape = input->getShape();
962 if (!input->is_dynamic())
965 auto output_ind = op.getOutputs().at(0);
966 auto output = _tensor_registry->getITensor(output_ind);
968 ir::Shape output_shape;
969 output_shape.append(input_shape.rank());
971 output->applyShape(output_shape);
972 assert(output->buffer() != nullptr);
975 void DynamicShapeInferer::visit(const ir::operation::Slice &op)
977 const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
978 const auto input = _tensor_registry->getITensor(input_index);
979 const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
980 const auto begins = _tensor_registry->getITensor(begins_index);
981 const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
982 const auto sizes = _tensor_registry->getITensor(sizes_index);
983 auto output_index = op.getOutputs().at(0);
984 auto output = _tensor_registry->getITensor(output_index);
986 if (!(input->is_dynamic() || begins->is_dynamic() || sizes->is_dynamic() || output->is_dynamic()))
991 ir::Shape input_shape = input->getShape();
992 auto begins_buf = reinterpret_cast<const int32_t *>(begins->buffer());
993 auto sizes_buf = reinterpret_cast<const int32_t *>(sizes->buffer());
995 ir::Shape new_shape = shape_inference::inferSliceShape(input_shape, begins_buf, sizes_buf);
997 output->applyShape(new_shape);
998 assert(output->buffer() != nullptr);
1001 void DynamicShapeInferer::visit(const ir::operation::Softmax &op)
1003 handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::INPUT));
1006 void DynamicShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
1008 const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
1009 const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
1010 const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
1011 auto output_idx{op.getOutputs().at(0)};
1013 const auto &input = _tensor_registry->getITensor(input_idx);
1014 const auto &block_shape = _tensor_registry->getITensor(block_shape_idx);
1015 const auto &padding = _tensor_registry->getITensor(padding_idx);
1016 auto output = _tensor_registry->getITensor(output_idx);
1018 if (!(input->is_dynamic() || block_shape->is_dynamic() || padding->is_dynamic() ||
1019 output->is_dynamic()))
1024 auto input_shape = input->getShape();
1025 auto block_shape_shape = block_shape->getShape();
1026 auto padding_shape = padding->getShape();
1028 auto block_shape_data = reinterpret_cast<int32_t *>(block_shape->buffer());
1029 auto padding_data = reinterpret_cast<int32_t *>(padding->buffer());
1031 ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
1032 input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
1034 output->applyShape(new_shape);
1035 assert(output->buffer() != nullptr);
1038 void DynamicShapeInferer::visit(const ir::operation::Split &op)
1040 const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
1041 const auto &input = _tensor_registry->getITensor(input_idx);
1043 // Return if all tensors are not dynamic
1044 bool has_dynamic = false;
1045 for (const auto output_idx : op.getOutputs())
1047 auto output = _tensor_registry->getITensor(output_idx);
1048 has_dynamic |= output->is_dynamic();
1050 if (!input->is_dynamic() && !has_dynamic)
1055 auto input_shape = input->getShape();
1057 const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
1058 const auto &axis = _tensor_registry->getITensor(axis_idx);
1060 auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
1061 const auto num_splits = op.param().num_splits;
1062 const auto rank = input_shape.rank();
1063 axis_value = axis_value < 0 ? axis_value + rank : axis_value;
1065 assert(0 <= axis_value && axis_value < rank);
1067 ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_value, num_splits);
1068 for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
1070 auto output_ind = op.getOutputs().at(out_tensor_idx);
1071 auto output = _tensor_registry->getITensor(output_ind);
1073 output->applyShape(new_shape);
1074 assert(output->buffer() != nullptr);
1078 void DynamicShapeInferer::visit(const ir::operation::SquaredDifference &op)
1080 handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS),
1081 op.getInputs().at(ir::operation::SquaredDifference::Input::RHS));
1084 void DynamicShapeInferer::visit(const ir::operation::Squeeze &op)
1086 const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
1087 const auto &input = _tensor_registry->getITensor(input_idx);
1089 if (!input->is_dynamic())
1094 auto input_shape = input->getShape();
1096 // Squeeze output shpae
1097 ir::Shape new_shape = shape_inference::inferSqueezeShape(input_shape, op.param());
1099 auto output_ind = op.getOutputs().at(0);
1100 auto output = _tensor_registry->getITensor(output_ind);
1102 output->applyShape(new_shape);
1103 assert(output->buffer() != nullptr);
1106 void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
1109 const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
1110 auto input = _tensor_registry->getITensor(input_index);
1111 ir::Shape input_shape = input->getShape();
1113 const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
1114 auto starts = _tensor_registry->getITensor(starts_index);
1116 const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
1117 auto ends = _tensor_registry->getITensor(ends_index);
1119 const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
1120 auto strides = _tensor_registry->getITensor(strides_index);
1122 if (!(input->is_dynamic() || starts->is_dynamic() || ends->is_dynamic() || strides->is_dynamic()))
1127 const auto begin_mask = op.param().begin_mask;
1128 const auto end_mask = op.param().end_mask;
1129 const auto shrink_axis_mask = op.param().shrink_axis_mask;
1130 const auto rank = input_shape.rank();
1132 auto op_params = shape_inference::buildStridedSliceParams(
1133 reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
1134 reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask, rank);
1136 auto output_index = op.getOutputs().at(0);
1137 auto output = _tensor_registry->getITensor(output_index);
1139 ir::Shape output_shape =
1140 onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
1142 output->applyShape(output_shape);
1143 assert(output->buffer() != nullptr);
1146 void DynamicShapeInferer::visit(const ir::operation::Tile &op)
1148 auto output_ind = op.getOutputs().at(0);
1149 auto output = _tensor_registry->getITensor(output_ind);
1151 auto input_idx = op.getInputs().at(ir::operation::Tile::Input::INPUT);
1152 auto input = _tensor_registry->getITensor(input_idx);
1154 auto multiplier_idx = op.getInputs().at(ir::operation::Tile::Input::MULTIPLES);
1155 auto multiplier = _tensor_registry->getITensor(multiplier_idx);
1157 if ((!input->is_dynamic()) && (!output->is_dynamic()))
1160 auto input_shape = input->getShape();
1161 auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier->buffer());
1162 assert(multiplier_buffer);
1164 auto mult_shape = multiplier->getShape();
1165 auto output_shape = shape_inference::inferTileShape(
1166 input_shape, multiplier_buffer, mult_shape.rank() == 0 ? 1 : mult_shape.dim(0));
1168 // set output shape and output buffer
1169 output->applyShape(output_shape);
1170 assert(output->buffer() != nullptr);
1173 void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
1175 // check if output is not dynamic
1176 auto output_ind = op.getOutputs().at(0);
1177 auto output = _tensor_registry->getITensor(output_ind);
1179 // from op, access the buffer of second input to read new shape
1180 auto input_ind = op.getInputs().at(ir::operation::Transpose::Input::INPUT);
1181 auto input = _tensor_registry->getITensor(input_ind);
1182 auto input_shape = input->getShape();
1185 Here, the state after compilation (static shape inference) could be one of the following:
1187 input perms output execution-time shape inf required
1188 ------------------------------------ --------------------------------
1189 case 1) static const static X
1190 case 2) static non-const dynamic O
1191 case 3) dynamic const dynamic O
1192 case 4) dynamic non-const dynamic O
1194 So, only when both input1 and ouput are static, we can skip dynamic shape inference.
1196 if ((!input->is_dynamic()) && (!output->is_dynamic()))
1199 auto perm_ind = op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION);
1200 auto perm = _tensor_registry->getITensor(perm_ind);
1202 ir::Shape new_shape;
1203 // TODO Change perm->dimension(0) == 0 to perm->num_elements() == 0
1204 if (perm->getShape().dim(0) == 0) // This condition means that perm is (n-1...0)
1206 // Call by (n-1...0)
1207 new_shape = shape_inference::inferTransposeShape(input_shape, nullptr, 0);
1212 if (static_cast<size_t>(input->getShape().rank()) != perm->getShape().num_elements())
1214 throw std::runtime_error("DynamicShapeInferer failed, bad rank size: " +
1215 std::to_string(perm->getShape().num_elements()));
1218 // set output shape, based on input and params
1219 const auto perm_buffer = reinterpret_cast<const int32_t *>(perm->buffer());
1221 shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->getShape().dim(0));
1223 output->applyShape(new_shape);
1224 assert(output->buffer() != nullptr);
1227 void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
1229 // check if output is not dynamic
1230 const auto input_idx{op.getInputs().at(0)};
1231 const auto &input = _tensor_registry->getITensor(input_idx);
1233 if (!input->is_dynamic())
1236 auto input_shape = input->getShape();
1238 const auto rank = input_shape.rank();
1239 const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
1240 const auto num = op.param().num;
1242 assert(0 <= axis && axis < rank);
1244 ir::Shape new_shape = shape_inference::inferUnpackShape(input_shape, axis, rank);
1246 for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
1248 auto output_ind = op.getOutputs().at(out_tensor_idx);
1249 auto output = _tensor_registry->getITensor(output_ind);
1251 output->applyShape(new_shape);
1253 assert(output->buffer() != nullptr);
1258 } // namespace onert