2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "luci/Service/CircleShapeInferenceRule.h"
20 #include "ShapeInfer_StridedSlice.h"
22 #include <luci/IR/CircleNodes.h>
23 #include <luci/IR/CircleDialect.h>
24 #include <luci/IR/CircleNodeVisitor.h>
27 #include <oops/InternalExn.h>
37 std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape)
40 for (uint32_t r = 0; r < tensor_shape.rank(); ++r)
44 os << tensor_shape.dim(r).value();
50 // Call this for CircleAvgPool2D and CircleMaxPool2D only
51 template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
53 LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
55 auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
56 assert(ifm_shape.rank() == 4);
58 uint32_t input_height = ifm_shape.dim(1).value();
59 uint32_t input_width = ifm_shape.dim(2).value();
60 uint32_t stride_height = node->stride()->h();
61 uint32_t stride_width = node->stride()->w();
62 uint32_t window_height = node->filter()->h();
63 uint32_t window_width = node->filter()->w();
64 uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
65 uint32_t dilation_width = 1;
66 uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
67 uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
69 uint32_t output_height = 0;
70 uint32_t output_width = 0;
72 if (node->padding() == luci::Padding::VALID)
74 output_height = (input_height + stride_height - effective_window_height) / stride_height;
75 output_width = (input_width + stride_width - effective_window_width) / stride_width;
77 else if (node->padding() == luci::Padding::SAME)
79 output_height = (input_height + stride_height - 1) / stride_height;
80 output_width = (input_width + stride_width - 1) / stride_width;
83 LUCI_ASSERT(false, "Wrong padding type");
85 loco::TensorShape ofm_shape;
87 ofm_shape.dim(0) = ifm_shape.dim(0);
88 ofm_shape.dim(1) = output_height;
89 ofm_shape.dim(2) = output_width;
90 ofm_shape.dim(3) = ifm_shape.dim(3);
92 return loco::NodeShape{ofm_shape};
96 * @brief Create a higher-rank TensorShape following NumPy broadcasting semantics
100 * auto expanded_tensor_shape = expand(tensor_shape).to(N);
102 class TensorShapeExpander
105 TensorShapeExpander(const loco::TensorShape &shape) : _shape{shape}
111 loco::TensorShape to(uint32_t output_rank)
113 auto const &input_shape = _shape;
114 uint32_t const input_rank = input_shape.rank();
116 assert(input_rank <= output_rank && "Cannot shrink rank");
117 uint32_t const axis_shift = output_rank - input_rank;
119 loco::TensorShape output_shape;
121 output_shape.rank(output_rank);
122 for (uint32_t axis = 0; axis < output_rank; ++axis)
124 output_shape.dim(axis) = (axis < axis_shift) ? 1 : input_shape.dim(axis - axis_shift);
131 const loco::TensorShape _shape;
135 * @breif Expand shape x and y to same rank by align right and filling with 1
137 void expand_rank(loco::TensorShape &x, loco::TensorShape &y)
139 auto x_rank = x.rank();
140 auto y_rank = y.rank();
142 if (x_rank == y_rank)
145 TensorShapeExpander x_exp(x);
146 TensorShapeExpander y_exp(y);
148 auto xy_rank = std::max(x_rank, y_rank);
150 x = x_rank > y_rank ? x : x_exp.to(xy_rank);
151 y = y_rank > x_rank ? y : y_exp.to(xy_rank);
155 * @breif Returns shape of expanded dimension of input x and y having same rank
157 loco::TensorShape expand_dimension(const loco::TensorShape &x, const loco::TensorShape &y)
159 assert(x.rank() == y.rank());
161 auto rank = x.rank();
163 loco::TensorShape output_shape;
165 output_shape.rank(rank);
166 for (uint32_t axis = 0; axis < rank; ++axis)
168 assert(x.dim(axis).known() && y.dim(axis).known());
170 auto x_dim = x.dim(axis).value();
171 auto y_dim = y.dim(axis).value();
173 // each dimension of x and y should be same or one must be 1 if different
174 if (!((x_dim == y_dim) || (x_dim == 1 || y_dim == 1)))
175 INTERNAL_EXN("Cannot produce expand_dimension of two shapes");
177 output_shape.dim(axis) = std::max(x_dim, y_dim);
183 loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::TensorShape &y)
188 expand_rank(x_match, y_match);
190 auto output_shape = expand_dimension(x_match, y_match);
195 // BatchMatMulV2 supports broadcasting in the batch dimensions(BatchMatMul doesn't)
196 // TODO Distinguish BatchMatMul and BatchMatMulV2
197 loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
198 const loco::TensorShape &y_shape, bool adj_x, bool adj_y)
200 uint32_t x_rank = x_shape.rank();
201 uint32_t y_rank = y_shape.rank();
202 assert(x_rank >= 2 && y_rank >= 2);
204 loco::TensorShape output_shape;
205 output_shape.rank(x_shape.rank());
206 // Braodcast in the batch dimension
207 if (x_rank > 2 || y_rank > 2)
209 loco::TensorShape dummy_x = x_shape;
210 loco::TensorShape dummy_y = y_shape;
211 expand_rank(dummy_x, dummy_y);
213 expand_rank(output_shape, dummy_y);
215 for (uint32_t d = 0; d < output_shape.rank() - 2; d++)
217 uint32_t max_dim = std::max(dummy_x.dim(d).value(), dummy_y.dim(d).value());
218 if (dummy_x.dim(d) == dummy_y.dim(d) ||
219 dummy_x.dim(d).value() * dummy_y.dim(d).value() == max_dim)
220 output_shape.dim(d).set(max_dim);
222 INTERNAL_EXN("BatchMatMul has wrong shape");
226 loco::Dimension x_lhs = adj_x ? x_shape.dim(x_rank - 1) : x_shape.dim(x_rank - 2);
227 loco::Dimension x_rhs = adj_x ? x_shape.dim(x_rank - 2) : x_shape.dim(x_rank - 1);
228 loco::Dimension y_lhs = adj_y ? y_shape.dim(y_rank - 1) : y_shape.dim(y_rank - 2);
229 loco::Dimension y_rhs = adj_y ? y_shape.dim(y_rank - 2) : y_shape.dim(y_rank - 1);
231 if (not(x_rhs == y_lhs))
232 INTERNAL_EXN("x_rhs and y_lhs should be same");
234 uint32_t out_rank = output_shape.rank();
235 output_shape.dim(out_rank - 2) = x_lhs;
236 output_shape.dim(out_rank - 1) = y_rhs;
238 return loco::NodeShape{output_shape};
241 loco::TensorShape own_shape(const luci::CircleNode *node)
243 loco::TensorShape shape;
244 shape.rank(node->rank());
245 for (uint32_t r = 0; r < node->rank(); ++r)
246 shape.dim(r) = loco::Dimension(node->dim(r).value());
250 loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indices, bool keep_dims)
252 const loco::DataType S32 = loco::DataType::S32;
254 auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
255 auto reduction_indices = loco::must_cast<const luci::CircleConst *>(indices);
258 // TODO support non-const case
259 // TODO support other data type
260 LUCI_ASSERT(reduction_indices->dtype() == S32, "Only support int 32");
263 std::vector<int32_t> reduction_values;
265 for (uint32_t i = 0; i < reduction_indices->size<S32>(); ++i)
267 int32_t axis = reduction_indices->at<S32>(i);
269 axis += input_shape.rank();
270 if (not(0 <= axis and axis < static_cast<int32_t>(input_shape.rank())))
271 INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
272 reduction_values.push_back(axis);
275 loco::TensorShape output_shape;
279 output_shape.rank(input_shape.rank());
280 for (uint32_t i = 0; i < input_shape.rank(); ++i)
281 output_shape.dim(i) = input_shape.dim(i);
282 for (uint32_t i = 0; i < reduction_values.size(); ++i)
283 output_shape.dim(reduction_values.at(i)) = 1;
287 std::vector<bool> check_reduce(input_shape.rank(), false);
288 for (uint32_t i = 0; i < reduction_values.size(); ++i)
289 check_reduce.at(reduction_values.at(i)) = true;
291 uint32_t reduce_cnt = 0;
292 for (uint32_t i = 0; i < check_reduce.size(); ++i)
293 if (check_reduce.at(i))
296 output_shape.rank(input_shape.rank() - reduce_cnt);
297 for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
298 if (check_reduce.at(i) == false)
299 output_shape.dim(j++) = input_shape.dim(i);
306 * @brief vector_from_constant will return int64_t vector from CircleConst node
308 template <loco::DataType T> std::vector<int64_t> vector_from_constant(luci::CircleConst *const_node)
310 std::vector<int64_t> result;
312 for (uint32_t idx = 0; idx < const_node->size<T>(); ++idx)
313 result.push_back(const_node->at<T>(idx));
318 template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
320 auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
321 auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>();
323 auto output_shape = broadcast_shape(x_shape, y_shape);
325 return loco::NodeShape{output_shape};
328 template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
330 auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
331 return loco::NodeShape{x_shape};
334 template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
336 auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>();
337 return loco::NodeShape{shape};
340 loco::NodeShape use_own(const luci::CircleNode *node)
342 loco::TensorShape shape = own_shape(node);
343 return loco::NodeShape{shape};
347 * @brief Class to infer the shape of CircleNode
349 * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
351 class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
354 loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); }
356 loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); }
358 loco::NodeShape visit(const luci::CircleAddN *node) final
360 auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
362 for (uint32_t idx = 1; idx < node->arity(); ++idx)
364 auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
365 if (!(shape == shape_idx))
367 INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
371 return loco::NodeShape{shape};
374 loco::NodeShape visit(const luci::CircleArgMax *node) final
376 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
377 auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
379 int64_t select_axis = 0;
381 LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
383 // Only support node's shape() is CircleConst with S32/S64
384 // Support S32 for now.
385 auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
386 LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
387 "Only support int32 CircleConst for CircleArgMax");
389 if (const_shape_node->rank() > 1)
390 INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
391 oops::to_uint32(const_shape_node->rank()));
393 select_axis = const_shape_node->scalar<loco::DataType::S32>();
395 assert(select_axis < input_shape.rank());
396 assert(select_axis >= 0); // TODO support minus of this breaks
398 // NOTE select_axis is removed
399 loco::TensorShape shape_output;
400 uint32_t rank = input_shape.rank();
401 uint32_t shrink = static_cast<uint32_t>(select_axis);
403 shape_output.rank(rank - 1);
404 for (uint32_t r = 0, d = 0; r < rank; ++r)
408 shape_output.dim(d++) = input_shape.dim(r);
410 return loco::NodeShape{shape_output};
413 loco::NodeShape visit(const luci::CircleArgMin *node) final
415 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
416 auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
418 int64_t select_axis = 0;
420 LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
422 // Only support node's shape() is CircleConst with S32/S64
423 // Support S32 for now.
424 auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
425 LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
426 "Only support int32 CircleConst for CircleArgMin");
428 if (const_shape_node->rank() > 1)
429 INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
430 oops::to_uint32(const_shape_node->rank()));
432 select_axis = const_shape_node->scalar<loco::DataType::S32>();
434 assert(select_axis < input_shape.rank());
435 assert(select_axis >= 0); // TODO support minus of this breaks
437 // NOTE select_axis is removed
438 loco::TensorShape shape_output;
439 uint32_t rank = input_shape.rank();
440 uint32_t shrink = static_cast<uint32_t>(select_axis);
442 shape_output.rank(rank - 1);
443 for (uint32_t r = 0, d = 0; r < rank; ++r)
447 shape_output.dim(d++) = input_shape.dim(r);
449 return loco::NodeShape{shape_output};
452 loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
454 return infer_pool_2d_shape(node);
457 loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
459 auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
460 auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
462 return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
465 loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
467 const loco::DataType S32 = loco::DataType::S32;
469 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
470 // Support only input rank is 3 and 4
471 assert(input_shape.rank() == 3 || input_shape.rank() == 4);
473 // Only support block_shape() with S32 type CircleConst for now
474 auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
475 LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32,
476 "Only support int32 block_shape");
478 // Only support crops() with S32 type CircleConst for now
479 auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
480 LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
482 auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
483 auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
484 assert(const_block_shape_shape.rank() == 1);
485 assert(const_crops_shape.rank() == 2);
487 int32_t input_spatial_dim = input_shape.rank() - 2;
488 assert(const_block_shape_shape.dim(0) == input_spatial_dim);
489 assert(const_crops_shape.dim(0) == input_spatial_dim);
490 assert(const_crops_shape.dim(1) == 2);
492 loco::TensorShape shape_output;
494 shape_output.rank(input_shape.rank());
496 int32_t output_batch_size = input_shape.dim(0).value();
497 for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
499 int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
500 dim_size -= const_crops->at<S32>(dim * 2);
501 dim_size -= const_crops->at<S32>(dim * 2 + 1);
502 shape_output.dim(dim + 1) = dim_size;
504 assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
505 output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
507 shape_output.dim(0) = output_batch_size;
508 shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
510 return loco::NodeShape{shape_output};
513 loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); }
515 loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
517 loco::NodeShape visit(const luci::CircleConcatenation *node) final
519 // TODO Support when CircleConcatenation has 0 input
520 assert(node->numValues() > 0);
522 auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
523 auto axis = node->axis();
525 axis += first_shape.rank();
528 assert(first_shape.rank() > static_cast<uint32_t>(axis));
530 loco::TensorShape output_shape;
532 output_shape.rank(first_shape.rank());
533 for (uint32_t i = 0; i < output_shape.rank(); ++i)
534 output_shape.dim(i) = first_shape.dim(i);
536 for (uint32_t i = 1; i < node->numValues(); ++i)
538 auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
540 for (uint32_t j = 0; j < output_shape.rank(); ++j)
542 if (j == static_cast<uint32_t>(axis))
543 output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
545 assert(output_shape.dim(j) == input_shape.dim(j));
549 return loco::NodeShape{output_shape};
552 loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
554 loco::NodeShape visit(const luci::CircleConv2D *node) final
558 auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
559 auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
561 INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker("
562 << ker_shape.rank() << ")" << std::endl;
564 assert(ifm_shape.rank() == 4);
565 assert(ker_shape.rank() == 4);
566 assert(ifm_shape.dim(3) == ker_shape.dim(3));
568 uint32_t input_height = ifm_shape.dim(1).value();
569 uint32_t input_width = ifm_shape.dim(2).value();
570 uint32_t stride_height = node->stride()->h();
571 uint32_t stride_width = node->stride()->w();
572 uint32_t ker_height = ker_shape.dim(1).value();
573 uint32_t ker_width = ker_shape.dim(2).value();
574 uint32_t dilation_height = node->dilation()->h();
575 uint32_t dilation_width = node->dilation()->w();
576 uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
577 uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
579 uint32_t output_height = 0;
580 uint32_t output_width = 0;
582 if (node->padding() == luci::Padding::VALID)
584 output_height = (input_height + stride_height - effective_ker_height) / stride_height;
585 output_width = (input_width + stride_width - effective_ker_width) / stride_width;
587 else if (node->padding() == luci::Padding::SAME)
589 output_height = (input_height + stride_height - 1) / stride_height;
590 output_width = (input_width + stride_width - 1) / stride_width;
593 LUCI_ASSERT(false, "Wrong padding type");
595 loco::TensorShape ofm_shape;
597 ofm_shape.dim(0) = ifm_shape.dim(0);
598 ofm_shape.dim(1) = output_height;
599 ofm_shape.dim(2) = output_width;
600 ofm_shape.dim(3) = ker_shape.dim(0);
602 return loco::NodeShape{ofm_shape};
605 loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); }
607 loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
609 loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
611 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
612 LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
614 // Only data format NHWC is supported
615 // TODO need to clarify what to do with layout in this operator
616 int32_t height = input_shape.dim(1).value();
617 int32_t width = input_shape.dim(2).value();
618 int32_t depth = input_shape.dim(3).value();
620 int block_size = node->block_size();
623 INTERNAL_EXN("Block size must be >= 2");
625 if (depth % (block_size * block_size))
627 INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2");
630 loco::TensorShape output_shape;
631 output_shape.rank(4);
633 output_shape.dim(0) = input_shape.dim(0).value();
634 output_shape.dim(1) = height * block_size;
635 output_shape.dim(2) = width * block_size;
636 output_shape.dim(3) = depth / (block_size * block_size);
638 return loco::NodeShape{output_shape};
641 loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
643 auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>(); // in NHWC
644 auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
646 assert(ifm_shape.rank() == 4);
647 assert(ker_shape.rank() == 4);
648 assert(ker_shape.dim(0).value() == 1);
650 uint32_t input_height = ifm_shape.dim(1).value();
651 uint32_t input_width = ifm_shape.dim(2).value();
652 uint32_t stride_height = node->stride()->h();
653 uint32_t stride_width = node->stride()->w();
654 uint32_t ker_height = ker_shape.dim(1).value();
655 uint32_t ker_width = ker_shape.dim(2).value();
656 uint32_t dilation_height = node->dilation()->h();
657 uint32_t dilation_width = node->dilation()->w();
658 uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
659 uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
661 uint32_t output_height = 0;
662 uint32_t output_width = 0;
664 if (node->padding() == luci::Padding::VALID)
666 output_height = (input_height + stride_height - effective_ker_height) / stride_height;
667 output_width = (input_width + stride_width - effective_ker_width) / stride_width;
669 else if (node->padding() == luci::Padding::SAME)
671 output_height = (input_height + stride_height - 1) / stride_height;
672 output_width = (input_width + stride_width - 1) / stride_width;
675 LUCI_ASSERT(false, "Wrong padding type");
677 loco::TensorShape ofm_shape;
679 ofm_shape.dim(0) = ifm_shape.dim(0);
680 ofm_shape.dim(1) = output_height;
681 ofm_shape.dim(2) = output_width;
682 ofm_shape.dim(3) = ker_shape.dim(3);
684 return loco::NodeShape{ofm_shape};
687 loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
689 loco::NodeShape visit(const luci::CircleElu *node) final
691 auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
693 return loco::NodeShape{input_shape};
696 loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); }
698 loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); }
700 loco::NodeShape visit(const luci::CircleExpandDims *node) final
702 const loco::DataType S32 = loco::DataType::S32;
703 auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
704 if (x_shape.rank() == 0)
706 // This maybe for unknown shape. We use shape from the node itself.
707 return use_own(node);
709 auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis());
710 LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis");
711 if (const_axis->rank() != 0 && const_axis->rank() != 1)
713 INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum());
715 int32_t axis = const_axis->at<S32>(0);
716 LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
717 (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
718 "Axis has to be between [-(D+1), D], where D is rank of input.");
719 size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
720 loco::TensorShape output_shape;
721 output_shape.rank(x_shape.rank() + 1);
723 for (; i < positive_axis; i++)
724 output_shape.dim(i) = x_shape.dim(i);
725 output_shape.dim(i) = loco::Dimension(1);
726 for (; i < x_shape.rank(); i++)
727 output_shape.dim(i + 1) = x_shape.dim(i);
728 return loco::NodeShape{output_shape};
731 loco::NodeShape visit(const luci::CircleFill *node) final
733 loco::TensorShape shape;
735 LUCI_ASSERT(node->dims(), "dims input should not be nullptr");
737 auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims());
738 if (dims_node != nullptr)
740 // Only support node with S32
741 LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst");
743 if (dims_node->rank() != 1)
744 INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank()));
746 shape.rank(dims_node->dim(0).value());
748 for (uint32_t axis = 0; axis < shape.rank(); ++axis)
750 shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis);
755 shape = own_shape(node);
759 return loco::NodeShape{shape};
762 loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
764 loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); }
766 loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); }
768 loco::NodeShape visit(const luci::CircleFullyConnected *node) final
770 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
771 auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
773 // Checking shape capability for fully connected layer
774 // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
775 // Weight: [# of units, K]
776 // Output: [D1 * D2 * ... * Dn / K, # of units]
777 if (input_shape.rank() < 2 || weights_shape.rank() != 2)
779 // Return node own shape if shape inference is not possible
780 return use_own(node);
783 uint32_t input_size = 1;
784 for (uint32_t i = 0; i < input_shape.rank(); i++)
786 input_size = input_size * input_shape.dim(i).value();
788 const uint32_t batch_size = input_size / weights_shape.dim(1).value();
789 loco::TensorShape out_shape;
791 out_shape.dim(0) = batch_size;
792 out_shape.dim(1) = weights_shape.dim(0);
794 return loco::NodeShape{out_shape};
797 loco::NodeShape visit(const luci::CircleGather *node) final
799 loco::TensorShape output_shape;
801 const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
802 const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
803 int32_t axis = node->axis();
805 // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
806 // shape that node already has.
807 if (input_shape.rank() == 0 || positions_shape.rank() == 0)
808 return use_own(node);
811 axis += input_shape.rank();
813 output_shape.rank(input_shape.rank() - 1 + positions_shape.rank());
814 int32_t outdim_index = 0;
815 for (int32_t i = 0; i < axis; ++i)
816 output_shape.dim(outdim_index++) = input_shape.dim(i);
817 for (uint32_t i = 0; i < positions_shape.rank(); ++i)
818 output_shape.dim(outdim_index++) = positions_shape.dim(i);
819 for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
820 output_shape.dim(outdim_index++) = input_shape.dim(i);
822 return loco::NodeShape{output_shape};
825 loco::NodeShape visit(const luci::CircleGatherNd *node) final
827 loco::TensorShape output_shape;
829 const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
830 const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
832 const auto params_rank = params_shape.rank();
833 const auto indices_rank = indices_shape.rank();
835 // see https://www.tensorflow.org/api_docs/python/tf/gather_nd
836 // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:]
837 // batch_dims isn't supported in tflite
839 // TODO: replace exceptions with setting shape to unknown?
841 if (!indices_shape.dim(indices_rank - 1).known())
842 INTERNAL_EXN("Last indices dimension is unknown");
844 auto indices_last_dim = indices_shape.dim(indices_rank - 1).value();
846 if (indices_last_dim > params_rank)
847 INTERNAL_EXN("Last indices dimension should be <= params rank");
849 const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
851 output_shape.rank(output_rank);
853 uint32_t output_index = 0;
854 for (uint32_t i = 0; i < indices_rank - 1; ++i)
856 auto &dim = indices_shape.dim(i);
858 INTERNAL_EXN("Unknown indices dimension is unsupported");
859 output_shape.dim(output_index++).set(dim.value());
862 for (uint32_t i = indices_last_dim; i < params_rank; ++i)
864 auto &dim = params_shape.dim(i);
866 INTERNAL_EXN("Unknown params dimension is unsupported");
867 output_shape.dim(output_index++).set(dim.value());
870 return loco::NodeShape{output_shape};
873 loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
875 loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
877 loco::NodeShape visit(const luci::CircleIf *node) final
879 // Shape of CircleIf is not used. Just use input 0
880 assert(node->input_count() > 0);
881 const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
882 return loco::NodeShape{input_shape};
885 loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); }
887 loco::NodeShape visit(const luci::CircleL2Pool2D *node) final
889 return infer_pool_2d_shape(node);
892 loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
894 const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
895 return loco::NodeShape{input_shape};
898 loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); }
900 loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); }
902 loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
904 const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
905 return loco::NodeShape{input_shape};
908 loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); }
910 loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); }
912 loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); }
914 loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); }
916 loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); }
918 loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final
920 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
921 auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
923 auto rank = diagonal_shape.rank();
925 LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1");
927 for (uint32_t i = 0; i < rank - 1; i++)
929 LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims");
932 auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
934 LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error");
936 return loco::NodeShape{input_shape};
939 loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); }
941 loco::NodeShape visit(const luci::CircleMatrixDiag *node) final
943 loco::TensorShape output_shape;
945 auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
946 auto rank = diagonal_shape.rank();
948 output_shape.rank(rank + 1);
950 for (uint32_t i = 0; i < rank; i++)
952 output_shape.dim(i) = diagonal_shape.dim(i);
955 output_shape.dim(rank) = diagonal_shape.dim(rank - 1);
957 return loco::NodeShape{output_shape};
960 loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); }
962 loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
964 return infer_pool_2d_shape(node);
967 loco::NodeShape visit(const luci::CircleMean *node) final
969 auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
970 return loco::NodeShape{output_shape};
973 loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); }
975 loco::NodeShape visit(const luci::CircleMirrorPad *node) final
977 const loco::DataType S32 = loco::DataType::S32;
979 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
980 auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
982 // TODO support non-const case
983 // TODO support other data type
984 LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
985 LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
987 int32_t n = paddings->dim(0).value();
988 int32_t v = paddings->dim(1).value();
990 LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
991 LUCI_ASSERT(n == int32_t(input_shape.rank()),
992 "paddings [n, 2] should have same value of input rank");
994 loco::TensorShape output_shape;
996 output_shape.rank(input_shape.rank());
997 for (int32_t ni = 0; ni < n; ++ni)
999 int32_t idx = ni * 2;
1000 int value = input_shape.dim(ni).value();
1001 value += paddings->at<S32>(idx + 0); // left
1002 value += paddings->at<S32>(idx + 1); // right
1003 output_shape.dim(ni) = value;
1006 return loco::NodeShape{output_shape};
1009 loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); }
1011 loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
1013 loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
1015 const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
1016 return loco::NodeShape{boxes_shape};
1019 loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
1021 loco::NodeShape visit(const luci::CircleOneHot *node) final
1023 const loco::DataType S32 = loco::DataType::S32;
1024 auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
1025 // Only support OneHot node's depth() is CircleConst with type S32
1026 // TODO support depth with other types
1027 auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
1028 LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst");
1029 if (depth->rank() != 0)
1030 INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank()));
1031 loco::TensorShape output_shape;
1032 output_shape.rank(indices_shape.rank() + 1);
1033 auto axis = node->axis();
1035 axis += indices_shape.rank() + 1;
1036 LUCI_ASSERT(0 <= axis, "Axis is out of range");
1037 LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range");
1039 for (uint32_t i = 0; i < output_shape.rank(); i++)
1041 if (i == static_cast<uint32_t>(axis))
1043 output_shape.dim(i) = depth->at<S32>(0);
1047 output_shape.dim(i) = indices_shape.dim(j++);
1050 return loco::NodeShape{output_shape};
1053 loco::NodeShape visit(const luci::CirclePack *node) final
1055 LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
1057 auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
1058 // Make sure all inputs have the same shape.
1059 for (uint32_t i = 1; i < node->values_count(); ++i)
1061 auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
1062 LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
1063 "All inputs must have the same shape");
1066 // Checking shape capability for pack layer
1067 // Input: tensors [D1, D2, ... Dn]
1069 // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
1070 auto axis = node->axis();
1072 axis += first_shape.rank() + 1;
1074 LUCI_ASSERT(0 <= axis, "Axis is out of range");
1075 LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
1077 loco::TensorShape output_shape;
1078 output_shape.rank(first_shape.rank() + 1);
1081 for (uint32_t i = 0; i < output_shape.rank(); ++i)
1083 if (i == static_cast<uint32_t>(axis))
1085 output_shape.dim(i) = node->values_count();
1089 output_shape.dim(i) = first_shape.dim(j++);
1093 return loco::NodeShape{output_shape};
1096 loco::NodeShape visit(const luci::CirclePad *node) final
1098 const loco::DataType S32 = loco::DataType::S32;
1100 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1101 auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
1103 // TODO support non-const case
1104 // TODO support other data type
1105 LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
1106 LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
1108 int32_t n = paddings->dim(0).value();
1109 int32_t v = paddings->dim(1).value();
1111 LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
1112 LUCI_ASSERT(n == int32_t(input_shape.rank()),
1113 "paddings [n, 2] should have same value of input rank");
1115 loco::TensorShape output_shape;
1117 output_shape.rank(input_shape.rank());
1118 for (int32_t ni = 0; ni < n; ++ni)
1120 int32_t idx = ni * 2;
1121 int value = input_shape.dim(ni).value();
1122 value += paddings->at<S32>(idx + 0); // left
1123 value += paddings->at<S32>(idx + 1); // right
1124 output_shape.dim(ni) = value;
1127 return loco::NodeShape{output_shape};
1130 loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); }
1132 loco::NodeShape visit(const luci::CirclePRelu *node) final
1134 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1135 auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
1137 auto output_shape = broadcast_shape(input_shape, alpha_shape);
1139 return loco::NodeShape{output_shape};
1142 loco::NodeShape visit(const luci::CircleRange *node) final
1144 loco::TensorShape output_shape;
1145 output_shape.rank(1);
1147 auto start_node = dynamic_cast<luci::CircleConst *>(node->start());
1148 auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit());
1149 auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta());
1151 if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr)
1153 return use_own(node);
1156 double start = 0, limit = 0, delta = 0;
1158 #define GET_RANGE_PARAM(DT) \
1159 start = start_node->scalar<DT>(); \
1160 limit = limit_node->scalar<DT>(); \
1161 delta = delta_node->scalar<DT>();
1163 switch (start_node->dtype())
1165 case loco::DataType::FLOAT32:
1166 GET_RANGE_PARAM(loco::DataType::FLOAT32)
1168 case loco::DataType::S32:
1169 GET_RANGE_PARAM(loco::DataType::S32)
1172 INTERNAL_EXN("Range data type not supported");
1175 #undef GET_RANGE_PARAM
1178 INTERNAL_EXN("Delta can not be zero");
1180 output_shape.dim(0) = ceil((limit - start) / delta);
1182 return loco::NodeShape{output_shape};
1185 loco::NodeShape visit(const luci::CircleRank *) final
1187 loco::TensorShape shape_output;
1188 shape_output.rank(0);
1190 return loco::NodeShape{shape_output};
1193 loco::NodeShape visit(const luci::CircleReduceAny *node) final
1195 auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
1196 return loco::NodeShape{output_shape};
1199 loco::NodeShape visit(const luci::CircleReduceMax *node) final
1201 auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
1202 return loco::NodeShape{output_shape};
1205 loco::NodeShape visit(const luci::CircleReduceMin *node) final
1207 auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
1208 return loco::NodeShape{output_shape};
1211 loco::NodeShape visit(const luci::CircleReduceProd *node) final
1213 auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
1214 return loco::NodeShape{output_shape};
1217 loco::NodeShape visit(const luci::CircleRelu *node) final
1219 auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
1221 return loco::NodeShape{input_shape};
1224 loco::NodeShape visit(const luci::CircleRelu6 *node) final
1226 auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
1228 return loco::NodeShape{input_shape};
1231 loco::NodeShape visit(const luci::CircleReluN1To1 *node) final
1233 auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
1235 return loco::NodeShape{input_shape};
1239 * @note CircleReshape has new shape info in two places: 2nd input and attribute.
1240 * This shape inference uses shape from input 'shape' node when it's constant.
1241 * If not, shape will be from node itself. shape from attribute is not used.
1243 * TODO Change this policy when not appropriate
1245 loco::NodeShape visit(const luci::CircleReshape *node) final
1249 const loco::DataType S32 = loco::DataType::S32;
1251 loco::TensorShape shape_by_input;
1253 LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
1255 // Only support node's shape() is CircleConst with S32
1256 // TODO support other node with other types
1257 auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
1258 if (const_shape_node != nullptr)
1260 LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
1262 shape_by_input.rank(const_shape_node->size<S32>());
1264 for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
1266 shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
1271 // We use shape from the node itself
1272 shape_by_input = own_shape(node);
1276 loco::TensorShape shape_by_attr;
1278 shape_by_attr.rank(node->newShape()->rank());
1280 for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
1282 shape_by_attr.dim(axis) = node->newShape()->dim(axis);
1286 if (!(shape_by_input == shape_by_attr))
1288 INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl;
1289 INFO(l) << " shape_by_input : " << shape_by_input << std::endl;
1290 INFO(l) << " shape_by_attr : " << shape_by_attr << std::endl;
1293 loco::TensorShape output_shape = shape_by_input;
1295 // One of the dimensions can have special value -1, meaning its actual value should be inferred.
1296 const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
1297 const uint32_t input_element_count = loco::element_count(&input_shape);
1298 uint32_t output_element_count = 1;
1299 uint32_t unknown_dim_index = UINT32_MAX;
1300 for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
1302 const uint32_t dim_value = output_shape.dim(dim_index).value();
1303 if (static_cast<int>(dim_value) == -1)
1305 LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
1306 unknown_dim_index = dim_index;
1310 output_element_count *= dim_value;
1313 if (unknown_dim_index != UINT32_MAX)
1315 output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
1318 return loco::NodeShape{output_shape};
1321 loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
1323 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1325 if (input_shape.rank() != 4)
1326 INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
1328 auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
1330 if (const_node->dtype() != loco::DataType::S32)
1331 INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
1333 if (const_node->rank() != 1)
1334 INTERNAL_EXN("Expected size tensor of rank 1");
1336 if (const_node->dim(0).value() != 2)
1337 INTERNAL_EXN("Expected size tensor with shape [2]");
1339 loco::TensorShape output_shape;
1340 output_shape.rank(4);
1341 output_shape.dim(0) = input_shape.dim(0);
1342 output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
1343 output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
1344 output_shape.dim(3) = input_shape.dim(3);
1346 return loco::NodeShape{output_shape};
1349 loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
1351 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1353 if (input_shape.rank() != 4)
1354 INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
1356 auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
1358 if (const_node->dtype() != loco::DataType::S32)
1359 INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
1361 if (const_node->rank() != 1)
1362 INTERNAL_EXN("Expected size tensor of rank 1");
1364 if (const_node->dim(0).value() != 2)
1365 INTERNAL_EXN("Expected size tensor with shape [2]");
1367 loco::TensorShape output_shape;
1368 output_shape.rank(4);
1369 output_shape.dim(0) = input_shape.dim(0);
1370 output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
1371 output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
1372 output_shape.dim(3) = input_shape.dim(3);
1374 return loco::NodeShape{output_shape};
1377 loco::NodeShape visit(const luci::CircleReverseSequence *node) final
1379 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1381 return loco::NodeShape{input_shape};
1384 loco::NodeShape visit(const luci::CircleRound *node) final { return use_x(node); }
1386 loco::NodeShape visit(const luci::CircleReverseV2 *node) final
1388 auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
1390 LUCI_ASSERT(loco::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
1391 "Tensor must be 1-D");
1393 return loco::NodeShape{input_shape};
1396 loco::NodeShape visit(const luci::CircleRsqrt *node) final { return use_x(node); }
1398 loco::NodeShape visit(const luci::CircleScatterNd *node) final
1400 loco::TensorShape output_shape;
1402 auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape());
1404 const loco::DataType S32 = loco::DataType::S32;
1405 const loco::DataType S64 = loco::DataType::S64;
1407 std::vector<int64_t> vect_shape;
1409 if (shape_node->dtype() == S32)
1410 vect_shape = vector_from_constant<S32>(shape_node);
1411 else if (shape_node->dtype() == S64)
1412 vect_shape = vector_from_constant<S64>(shape_node);
1414 LUCI_ASSERT(false, "Only support int32/int64 for shape()");
1416 output_shape.rank(vect_shape.size());
1417 for (uint32_t i = 0; i < vect_shape.size(); ++i)
1418 output_shape.dim(i) = vect_shape[i];
1420 return loco::NodeShape{output_shape};
1423 loco::NodeShape visit(const luci::CircleSegmentSum *node) final
1425 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1426 auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
1428 LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
1429 LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
1430 "segment_ids size must be equal to the size of data's first dimension");
1432 auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids());
1434 std::vector<int64_t> vect_ids;
1436 if (ids_shape_value->dtype() == loco::DataType::S32)
1437 vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
1439 LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
1440 "segment_ids values should be sorted")
1442 loco::TensorShape output_shape;
1444 output_shape.rank(input_shape.rank());
1446 for (uint32_t i = 1; i < input_shape.rank(); ++i)
1447 output_shape.dim(i) = input_shape.dim(i);
1449 output_shape.dim(0) = vect_ids.back() + 1;
1451 return loco::NodeShape{output_shape};
1454 loco::NodeShape visit(const luci::CircleSelect *node) final
1456 auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
1457 assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
1459 // condition shape validation
1460 auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
1461 if (c_shape.rank() != t_shape.rank())
1463 if (c_shape.rank() != 0 && c_shape.rank() != 1)
1464 INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
1466 if (c_shape.rank() == 1)
1468 if (c_shape.dim(0).value() != t_shape.dim(0).value())
1469 INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)");
1473 return loco::NodeShape{t_shape};
1476 loco::NodeShape visit(const luci::CircleSelectV2 *node) final
1478 auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
1479 auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
1480 auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
1482 // validate ability to broadcast shapes to each other
1483 auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
1484 return loco::NodeShape{b_shape};
1487 loco::NodeShape visit(const luci::CircleShape *node) final
1489 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1491 loco::TensorShape output_shape;
1493 output_shape.rank(1);
1494 output_shape.dim(0) = input_shape.rank();
1496 return loco::NodeShape{output_shape};
1499 loco::NodeShape visit(const luci::CircleSin *node) final { return use_x(node); }
1501 loco::NodeShape visit(const luci::CircleSlice *node) final
1503 const loco::DataType S32 = loco::DataType::S32;
1504 const loco::DataType S64 = loco::DataType::S64;
1506 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1508 auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
1509 auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
1511 loco::TensorShape output_shape;
1512 std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t
1513 std::vector<int64_t> vect_size;
1515 if (const_begin->dtype() == S32)
1516 vect_begin = vector_from_constant<S32>(const_begin);
1517 else if (const_begin->dtype() == S64)
1518 vect_begin = vector_from_constant<S64>(const_begin);
1520 LUCI_ASSERT(false, "Only support int32/int64 for begin()");
1522 if (const_size->dtype() == S32)
1523 vect_size = vector_from_constant<S32>(const_size);
1524 else if (const_size->dtype() == S64)
1525 vect_size = vector_from_constant<S64>(const_size);
1527 LUCI_ASSERT(false, "Only support int32/int64 for size()");
1529 assert(input_shape.rank() == vect_begin.size());
1530 assert(input_shape.rank() == vect_size.size());
1532 output_shape.rank(vect_begin.size());
1533 for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
1535 auto size = vect_size.at(idx);
1538 size = input_shape.dim(idx).value() - vect_begin.at(idx);
1540 output_shape.dim(idx) = size;
1543 return loco::NodeShape{output_shape};
1546 loco::NodeShape visit(const luci::CircleSoftmax *node) final { return use_logits(node); }
1548 loco::NodeShape visit(const luci::CircleSpaceToBatchND *node) final
1550 const loco::DataType S32 = loco::DataType::S32;
1552 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1553 // Support only input rank is 3 and 4
1554 assert(input_shape.rank() == 3 || input_shape.rank() == 4);
1556 // Only support block_shape() with S32 type CircleConst for now
1557 auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
1558 LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape");
1560 // Only support paddings() with S32 type CircleConst for now
1561 auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
1562 LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
1564 auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
1565 auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
1566 assert(const_block_shape_shape.rank() == 1);
1567 assert(const_paddings_shape.rank() == 2);
1569 int32_t input_spatial_dim = input_shape.rank() - 2;
1570 assert(const_block_shape_shape.dim(0) == input_spatial_dim);
1571 assert(const_paddings_shape.dim(0) == input_spatial_dim);
1572 assert(const_paddings_shape.dim(1) == 2);
1574 // Check all values of block_shape >= 1
1575 uint32_t ele_count = const_block_shape->size<S32>();
1576 for (uint32_t e = 0; e < ele_count; ++e)
1578 auto val = const_block_shape->at<S32>(e);
1581 INTERNAL_EXN_V("All values of block_shape >= 1: ", e);
1585 loco::TensorShape shape_output;
1587 shape_output.rank(input_shape.rank());
1589 int32_t output_batch_size = input_shape.dim(0).value();
1590 for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
1592 int dim_size = input_shape.dim(dim + 1).value();
1593 dim_size += const_paddings->at<S32>(dim * 2);
1594 dim_size += const_paddings->at<S32>(dim * 2 + 1);
1595 shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim);
1597 assert(dim_size % const_block_shape->at<S32>(dim) == 0);
1598 output_batch_size = output_batch_size * const_block_shape->at<S32>(dim);
1600 shape_output.dim(0) = output_batch_size;
1601 shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
1603 return loco::NodeShape{shape_output};
1606 loco::NodeShape visit(const luci::CircleSpaceToDepth *node) final
1608 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1609 LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
1611 // Only data format NHWC is supported
1612 int32_t height = input_shape.dim(1).value();
1613 int32_t width = input_shape.dim(2).value();
1614 int32_t depth = input_shape.dim(3).value();
1616 int block_size = node->block_size();
1619 INTERNAL_EXN("Block size must be >= 2");
1621 if ((height % block_size) || (width % block_size))
1623 INTERNAL_EXN("The input tensor's height and width must be divisible by block_size");
1626 loco::TensorShape output_shape;
1627 output_shape.rank(4);
1629 output_shape.dim(0) = input_shape.dim(0).value();
1630 output_shape.dim(1) = height / block_size;
1631 output_shape.dim(2) = width / block_size;
1632 output_shape.dim(3) = block_size * block_size * depth;
1634 return loco::NodeShape{output_shape};
1637 loco::NodeShape visit(const luci::CircleSparseToDense *node) final
1639 loco::TensorShape shape;
1641 LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr");
1643 auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
1644 if (output_shape_node != nullptr)
1646 // Only support node with S32
1647 LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
1648 "Only support int32 CircleConst");
1650 if (output_shape_node->rank() != 1)
1651 INTERNAL_EXN_V("Only support rank 1 CircleConst",
1652 oops::to_uint32(output_shape_node->rank()));
1654 shape.rank(output_shape_node->dim(0).value());
1656 for (uint32_t axis = 0; axis < shape.rank(); ++axis)
1658 shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
1663 shape = own_shape(node);
1667 return loco::NodeShape{shape};
1670 loco::NodeShape visit(const luci::CircleSplit *node) final
1672 // We'll set Split output as same as input so that SplitOut can handle it's own shape
1673 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1674 return loco::NodeShape{input_shape};
1677 loco::NodeShape visit(const luci::CircleSplitV *node) final
1679 // We'll set SplitV output as same as input so that SplitOut can handle it's own shape
1680 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1681 return loco::NodeShape{input_shape};
1684 loco::NodeShape visit(const luci::CircleSqrt *node) final { return use_x(node); }
1686 loco::NodeShape visit(const luci::CircleSquare *node) final { return use_x(node); }
1688 loco::NodeShape visit(const luci::CircleSquaredDifference *node) final
1690 return broadcast_xy(node);
1693 loco::NodeShape visit(const luci::CircleStridedSlice *node) final
1695 auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin());
1696 auto end_node = dynamic_cast<luci::CircleConst *>(node->end());
1697 auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides());
1699 if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr)
1701 return use_own(node);
1704 loco::TensorShape shape = infer_output_shape(node);
1705 return loco::NodeShape{shape};
1708 loco::NodeShape visit(const luci::CircleSqueeze *node) final
1710 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1712 // TODO input shape may be unknown before runtime
1713 std::vector<bool> do_squeeze(input_shape.rank(), false);
1714 uint32_t num_squeezed = 0;
1716 if (!node->squeeze_dims().empty())
1718 // SqueezeDims not empty, squeeze only dims specified
1719 for (int32_t raw_dim : node->squeeze_dims())
1721 int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
1723 if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() ||
1724 input_shape.dim(dim).value() != 1)
1726 INTERNAL_EXN("invalid dimention specified to Squeeze");
1729 if (!do_squeeze[dim])
1731 do_squeeze[dim] = true;
1736 // SqueezeDims empty, squeeze any dims with size == 1
1737 for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
1739 if (input_shape.dim(dim) == 1)
1741 do_squeeze[dim] = true;
1747 loco::TensorShape output_shape;
1748 output_shape.rank(input_shape.rank() - num_squeezed);
1750 for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
1752 if (!do_squeeze[in_dim])
1754 output_shape.dim(out_dim++) = input_shape.dim(in_dim);
1758 return loco::NodeShape{output_shape};
1761 loco::NodeShape visit(const luci::CircleSub *node) final { return broadcast_xy(node); }
1763 loco::NodeShape visit(const luci::CircleSum *node) final
1765 auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
1766 return loco::NodeShape{output_shape};
1769 loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); }
1771 loco::NodeShape visit(const luci::CircleTile *node) final
1773 const loco::DataType S32 = loco::DataType::S32;
1775 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1776 auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
1778 // TODO support non-const case
1779 // TODO support S64 type
1780 LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples");
1781 LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1")
1783 uint32_t n = multiples->dim(0).value();
1785 LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
1787 loco::TensorShape output_shape;
1789 output_shape.rank(input_shape.rank());
1790 for (uint32_t ni = 0; ni < n; ++ni)
1792 int32_t multiple = multiples->at<S32>(ni);
1793 output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple);
1796 return loco::NodeShape{output_shape};
1799 loco::NodeShape visit(const luci::CircleTopKV2 *node) final
1801 // set shape of this node as same as input
1802 const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1803 return loco::NodeShape{input_shape};
1806 loco::NodeShape visit(const luci::CircleTranspose *node) final
1808 auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
1810 auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
1812 loco::TensorShape output_shape;
1813 output_shape.rank(input_shape.rank());
1815 assert(perm_node->dtype() == loco::DataType::S32);
1816 assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
1818 for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
1820 auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
1821 output_shape.dim(out_axis) = input_shape.dim(in_axis);
1824 return output_shape;
1827 loco::NodeShape visit(const luci::CircleUnique *node) final
1829 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1831 assert(input_shape.rank() == 1);
1833 loco::TensorShape shape_output;
1834 shape_output = own_shape(node);
1836 return loco::NodeShape{shape_output};
1839 loco::NodeShape visit(const luci::CircleTransposeConv *node) final
1841 // TransposeConv's output shape is written in its 'inputSizes' argument
1842 auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes());
1843 // TODO support non-const type
1844 LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
1845 LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
1846 "Only support rank 1 with 4 entries")
1848 loco::TensorShape shape;
1851 for (uint32_t axis = 0; axis < 4; ++axis)
1852 shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
1854 return loco::NodeShape{shape};
1857 loco::NodeShape visit(const luci::CircleUnpack *node) final
1859 // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
1860 // We'll set shape of CircleUnpack to shape of actual outputs
1861 // TODO fix this if any problem rises
1862 auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
1864 auto axis = node->axis();
1865 auto num = node->num();
1866 auto rank = static_cast<int32_t>(value_shape.rank());
1871 return use_own(node);
1874 LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range");
1879 LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()),
1880 "num, axis maybe incorrect");
1882 loco::TensorShape output_shape;
1883 output_shape.rank(rank - 1);
1885 for (int32_t i = 0, o = 0; i < rank; ++i)
1888 output_shape.dim(o++) = value_shape.dim(i);
1891 return loco::NodeShape{output_shape};
1894 loco::NodeShape visit(const luci::CircleWhere *node) final { return use_own(node); }
1896 loco::NodeShape visit(const luci::CircleWhile *node) final
1898 // Shape of CircleWhile is not used. Just use input 0
1899 assert(node->arity() > 0);
1900 const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
1901 return loco::NodeShape{input_shape};
1904 loco::NodeShape visit(const luci::CircleZerosLike *node) final
1906 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1908 return loco::NodeShape{input_shape};
1912 loco::NodeShape visit(const luci::CircleBCQFullyConnected *node) final
1914 loco::TensorShape out_shape;
1916 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1917 auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
1919 LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
1921 int32_t qbits_sum = 0;
1922 for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i)
1924 qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
1928 out_shape.dim(0) = qbits_sum;
1929 out_shape.dim(1) = input_shape.dim(1);
1931 return loco::NodeShape{out_shape};
1934 loco::NodeShape visit(const luci::CircleBCQGather *node) final
1936 loco::TensorShape input_shape;
1937 loco::TensorShape output_shape;
1939 const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
1940 const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
1941 auto axis = node->axis();
1943 auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
1945 for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
1947 qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
1950 input_shape.rank(2);
1951 input_shape.dim(0) = qbits_sum;
1952 input_shape.dim(1) = input_binary_shape.dim(1).value() * 32;
1954 output_shape.rank(input_shape.rank() - 1 + indices_shape.rank());
1955 int32_t outdim_index = 0;
1956 for (int32_t i = 0; i < axis; ++i)
1957 output_shape.dim(outdim_index++) = input_shape.dim(i);
1958 for (uint32_t i = 0; i < indices_shape.rank(); ++i)
1959 output_shape.dim(outdim_index++) = indices_shape.dim(i);
1960 for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
1961 output_shape.dim(outdim_index++) = input_shape.dim(i);
1963 return loco::NodeShape{output_shape};
1966 loco::NodeShape visit(const luci::CircleInstanceNorm *node) final
1968 auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
1970 return loco::NodeShape{input_shape};
1974 loco::NodeShape visit(const luci::CircleInput *node) final
1976 loco::TensorShape shape;
1978 shape.rank(node->rank());
1979 for (uint32_t axis = 0; axis < node->rank(); axis++)
1980 shape.dim(axis) = node->dim(axis);
1982 return loco::NodeShape{shape};
1985 loco::NodeShape visit(const luci::CircleOutput *node) final
1987 auto graph_outputs = node->graph()->outputs();
1988 auto graph_output = graph_outputs->at(node->index());
1989 auto output_shape = graph_output->shape();
1991 return loco::NodeShape{*output_shape};
1994 loco::NodeShape visit(const luci::CircleOutputDummy *node) final { return use_own(node); }
1996 loco::NodeShape visit(const luci::CircleOutputExclude *node) final { return use_own(node); }
1998 loco::NodeShape visit(const luci::CircleCustomOut *node) final { return use_own(node); }
2000 loco::NodeShape visit(const luci::CircleIfOut *node) final
2003 * @note IF operator type and shape are that of the "then" and "else"
2006 auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
2007 if (circle_if == nullptr)
2009 INTERNAL_EXN("CircleIf IR is not configured correctly");
2012 auto index = node->index();
2013 auto then_graph = circle_if->then_graph();
2014 auto else_graph = circle_if->else_graph();
2015 assert(then_graph != nullptr);
2016 assert(else_graph != nullptr);
2018 // shape and type are assumed to be same
2019 // these are checked at post_import_graph() in Import
2020 auto then_outputs = loco::output_nodes(then_graph);
2021 auto else_outputs = loco::output_nodes(else_graph);
2022 assert(then_outputs.size() == else_outputs.size());
2023 assert(index < static_cast<int32_t>(then_outputs.size()));
2025 auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
2026 auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
2028 auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
2029 auto else_graph_outputs = else_graph->outputs();
2030 assert(then_graph_outputs->size() == else_graph_outputs->size());
2032 auto then_graph_output = then_graph_outputs->at(then_out->index());
2033 auto else_graph_output = else_graph_outputs->at(else_out->index());
2034 (void)else_graph_output; // make compiler happy for unused variable warnings
2035 assert(*then_graph_output->shape() == *else_graph_output->shape());
2037 return loco::NodeShape{*then_graph_output->shape()};
2040 loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
2042 const loco::DataType S32 = loco::DataType::S32;
2044 auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
2045 if (nmsv4 == nullptr)
2046 INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
2048 auto index = node->index();
2050 return loco::TensorShape({0});
2054 auto unknown = loco::TensorShape{loco::Dimension()};
2055 auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
2056 if (max_output_size == nullptr)
2057 return unknown; // we need CircleConst for max output size
2059 LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
2061 if (max_output_size->size<S32>() < 1)
2064 auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
2065 return loco::TensorShape{max_output_size_value};
2068 loco::NodeShape visit(const luci::CircleSplitOut *node) final
2070 const loco::DataType S32 = loco::DataType::S32;
2072 auto split = dynamic_cast<const luci::CircleSplit *>(node->input());
2073 if (split == nullptr)
2074 INTERNAL_EXN("CircleSplit IR is not configured correctly");
2076 loco::NodeShape unknown;
2078 auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
2080 auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
2081 if (split_dim == nullptr)
2082 return unknown; // we need CircleConst for split_dim
2083 LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
2085 assert(split_dim->size<S32>() == 1);
2086 auto split_dim_axis = split_dim->at<S32>(0);
2087 if (split_dim_axis < 0)
2088 split_dim_axis += split_shape.rank();
2090 auto split_dim_value = split_shape.dim(split_dim_axis).value();
2091 assert(split_dim_value % split->num_split() == 0);
2092 const int split_depth = split_dim_value / split->num_split();
2094 loco::TensorShape output_shape = split_shape;
2096 // All shapes are equally same
2097 output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
2099 return loco::NodeShape{output_shape};
2102 loco::NodeShape visit(const luci::CircleSplitVOut *node) final
2104 const loco::DataType S32 = loco::DataType::S32;
2106 auto split = dynamic_cast<const luci::CircleSplitV *>(node->input());
2107 if (split == nullptr)
2108 INTERNAL_EXN("CircleSplit IR is not configured correctly");
2110 loco::NodeShape unknown;
2112 auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
2114 auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
2115 if (size_splits == nullptr)
2116 return unknown; // we need CircleConst for size_splits
2117 LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits");
2119 auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
2120 if (split_dim == nullptr)
2121 return unknown; // we need CircleConst for split_dim
2122 LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
2125 assert(split_dim->size<S32>() == 1);
2126 auto split_dim_axis = split_dim->at<S32>(0);
2127 if (split_dim_axis < 0)
2128 split_dim_axis += split_shape.rank();
2130 // interpret size_splits values
2131 int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>());
2132 assert(size_splits_count == split->num_split());
2134 int64_t minus_one_count = 0, size_splits_sum = 0;
2135 for (int32_t idx = 0; idx < size_splits_count; ++idx)
2137 auto size = size_splits->at<S32>(idx);
2142 size_splits_sum += size;
2144 if (minus_one_count > 1)
2145 INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values");
2147 // calcuate this SplitVOut shape
2148 auto input_size = split_shape.dim(split_dim_axis).value();
2149 assert(size_splits_sum <= input_size);
2151 auto index_this = node->index();
2152 assert(0 <= index_this && index_this < split->num_split());
2153 auto split_depth = size_splits->at<S32>(index_this);
2154 if (split_depth == -1)
2155 split_depth = input_size - size_splits_sum;
2157 loco::TensorShape output_shape = split_shape;
2159 output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
2161 return loco::NodeShape{output_shape};
2164 loco::NodeShape visit(const luci::CircleTopKV2Out *node) final
2166 const loco::DataType S32 = loco::DataType::S32;
2168 auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input());
2169 if (topkv2 == nullptr)
2170 INTERNAL_EXN("CircleSplit IR is not configured correctly");
2172 // shape of topkv2 is same as topkv2->input()
2173 auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
2175 auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
2176 LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
2177 assert(node_k->size<S32>() == 1);
2179 loco::TensorShape output_shape;
2181 output_shape.rank(input_shape.rank());
2182 for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx)
2184 output_shape.dim(idx) = input_shape.dim(idx);
2186 output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0);
2188 return loco::NodeShape{output_shape};
2191 loco::NodeShape visit(const luci::CircleUniqueOut *node) final
2193 auto unique = dynamic_cast<const luci::CircleUnique *>(node->input());
2194 if (unique == nullptr)
2196 INTERNAL_EXN("CircleUnique IR is not configured correctly");
2199 auto unique_shape = loco::shape_get(unique).as<loco::TensorShape>();
2201 return loco::NodeShape{unique_shape};
2204 loco::NodeShape visit(const luci::CircleUnpackOut *node) final
2206 auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
2207 if (unpack == nullptr)
2209 INTERNAL_EXN("CircleUnpack IR is not configured correctly");
2212 auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
2214 return loco::NodeShape{unpack_shape};
2217 loco::NodeShape visit(const luci::CircleWhileOut *node) final
2220 * @note WHILE operator's shape is the same with the "cond"
2223 auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input());
2224 if (circle_while == nullptr)
2226 INTERNAL_EXN("CircleWhile IR is not configured correctly");
2229 auto index = node->index();
2230 auto cond_graph = circle_while->cond_graph();
2231 assert(cond_graph != nullptr);
2233 // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by
2234 // loco::input_nodes
2235 auto cond_inputs = loco::input_nodes(cond_graph);
2236 auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
2238 auto cond_graph_inputs = cond_graph->inputs();
2239 auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
2241 auto cond_graph_input_shape = *cond_graph_input->shape();
2242 auto this_shape = own_shape(node);
2244 if (!(this_shape == cond_graph_input_shape))
2247 WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape
2248 << " vs " << cond_graph_input_shape;
2251 return loco::NodeShape{this_shape};
2260 bool CircleShapeInferenceRule::recognize(const loco::Dialect *d) const
2262 return CircleDialect::get() == d;
2265 bool CircleShapeInferenceRule::infer(const loco::Node *node, loco::NodeShape &shape) const
2269 assert(node->dialect() == CircleDialect::get());
2271 ShapeInferenceAlgorithm alg;
2272 auto circle_node = loco::must_cast<const CircleNode *>(node);
2274 bool is_shape_undefined = (circle_node->shape_status() == ShapeStatus::UNDEFINED);
2275 bool is_shape_none = (circle_node->shape_status() == ShapeStatus::NOSHAPE);
2276 bool is_scalar = (circle_node->rank() == 0);
2278 if (is_shape_undefined)
2279 shape = circle_node->accept(&alg);
2282 if (is_shape_none || is_scalar)
2283 shape = own_shape(circle_node);
2285 shape = circle_node->accept(&alg);
2288 VERBOSE(l, 1) << "[luci] shape: " << circle_node->name();
2289 VERBOSE(l, 1) << " own_shape: " << own_shape(circle_node)
2290 << " -> infer: " << shape.as<loco::TensorShape>();