Imported Upstream version 1.12.0
[platform/core/ml/nnfw.git] / runtime / onert / backend / cpu / KernelGenerator.cc
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "KernelGenerator.h"
18
19 #include "ops/AddNLayer.h"
20 #include "ops/ArgMinMaxLayer.h"
21 #include "ops/BatchToSpaceNDLayer.h"
22 #include "ops/BinaryArithmeticLayer.h"
23 #include "ops/CompareLayer.h"
24 #include "ops/ConcatLayer.h"
25 #include "ops/ConvolutionLayer.h"
26 #include "ops/DepthToSpaceLayer.h"
27 #include "ops/DepthwiseConvolutionLayer.h"
28 #include "ops/EinsumLayer.h"
29 #include "ops/ElementwiseActivationLayer.h"
30 #include "ops/ElementwiseBinaryLayer.h"
31 #include "ops/ElementwiseUnaryLayer.h"
32 #include "ops/ExpandDimsLayer.h"
33 #include "ops/FillLayer.h"
34 #include "ops/FullyConnectedLayer.h"
35 #include "ops/GatherLayer.h"
36 #include "ops/LSTMLayer.h"
37 #include "ops/MeanLayer.h"
38 #include "ops/OneHotLayer.h"
39 #include "ops/OperationUtils.h"
40 #include "ops/PackLayer.h"
41 #include "ops/PadLayer.h"
42 #include "ops/PoolLayer.h"
43 #include "ops/PowLayer.h"
44 #include "ops/RangeLayer.h"
45 #include "ops/RankLayer.h"
46 #include "ops/ReduceLayer.h"
47 #include "ops/ReshapeLayer.h"
48 #include "ops/ResizeBilinearLayer.h"
49 #include "ops/ReverseLayer.h"
50 #include "ops/SelectLayer.h"
51 #include "ops/ShapeLayer.h"
52 #include "ops/SliceLayer.h"
53 #include "ops/SoftMaxLayer.h"
54 #include "ops/StridedSliceLayer.h"
55 #include "ops/SpaceToBatchNDLayer.h"
56 #include "ops/SpaceToDepthLayer.h"
57 #include "ops/SplitLayer.h"
58 #include "ops/SplitVLayer.h"
59 #include "ops/TileLayer.h"
60 #include "ops/TransposeLayer.h"
61 #include "ops/UnpackLayer.h"
62 #include "ops/SquaredDiffLayer.h"
63 #include "ops/L2NormLayer.h"
64 #include "ops/MatrixBandPartLayer.h"
65 #include "ops/BatchMatMulLayer.h"
66 #include "ops/BroadcastToLayer.h"
67 #include "ops/FusedBatchNormLayer.h"
68 #include "ops/LogSoftMaxLayer.h"
69 #include "ops/StatelessRandomUniformLayer.h"
70
71 #include <backend/Backend.h>
72 #include <backend/IConfig.h>
73 #include <memory>
74 #include <util/Utils.h>
75 #include <util/logging.h>
76 #include <exec/DynamicShapeInferer.h>
77
78 #include <stdexcept>
79
80 namespace onert
81 {
82 namespace backend
83 {
84 namespace cpu
85 {
86
87 namespace
88 {
89 ops::ArithmeticType
90 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
91 {
92   switch (arithmetic_type_ir)
93   {
94     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
95       return ops::ArithmeticType::kAdd;
96     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
97       return ops::ArithmeticType::kSub;
98     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
99       return ops::ArithmeticType::kMul;
100     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
101       return ops::ArithmeticType::kDiv;
102     default:
103       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
104   }
105 }
106
107 ops::ElementwiseActivationType
108 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
109 {
110   switch (type_ir)
111   {
112     case ir::operation::ElementwiseActivation::Type::ELU:
113       return ops::ElementwiseActivationType::kElu;
114     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
115       return ops::ElementwiseActivationType::kLogistic;
116     case ir::operation::ElementwiseActivation::Type::RELU:
117       return ops::ElementwiseActivationType::kReLU;
118     case ir::operation::ElementwiseActivation::Type::TANH:
119       return ops::ElementwiseActivationType::kTanh;
120     case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
121       return ops::ElementwiseActivationType::kLeakyReLU;
122     default:
123       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
124   }
125 }
126
127 ops::ElementwiseBinaryType
128 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
129 {
130   switch (type_ir)
131   {
132     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
133       return ops::ElementwiseBinaryType::kLogicalAnd;
134     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
135       return ops::ElementwiseBinaryType::kLogicalOr;
136     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
137       return ops::ElementwiseBinaryType::kMax;
138     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
139       return ops::ElementwiseBinaryType::kMin;
140     default:
141       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
142   }
143 }
144
145 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
146 {
147   switch (type_ir)
148   {
149     case ir::operation::ElementwiseUnary::Type::ABS:
150       return ops::ElementwiseUnaryType::kAbs;
151     case ir::operation::ElementwiseUnary::Type::CAST:
152       return ops::ElementwiseUnaryType::kCast;
153     case ir::operation::ElementwiseUnary::Type::COS:
154       return ops::ElementwiseUnaryType::kCos;
155     case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
156       return ops::ElementwiseUnaryType::kDequantize;
157     case ir::operation::ElementwiseUnary::Type::ERF:
158       return ops::ElementwiseUnaryType::kErf;
159     case ir::operation::ElementwiseUnary::Type::EXP:
160       return ops::ElementwiseUnaryType::kExp;
161     case ir::operation::ElementwiseUnary::Type::FLOOR:
162       return ops::ElementwiseUnaryType::kFloor;
163     case ir::operation::ElementwiseUnary::Type::LOG:
164       return ops::ElementwiseUnaryType::kLog;
165     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
166       return ops::ElementwiseUnaryType::kLogicalNot;
167     case ir::operation::ElementwiseUnary::Type::NEG:
168       return ops::ElementwiseUnaryType::kNeg;
169     case ir::operation::ElementwiseUnary::Type::QUANTIZE:
170       return ops::ElementwiseUnaryType::kQuantize;
171     case ir::operation::ElementwiseUnary::Type::ROUND:
172       return ops::ElementwiseUnaryType::kRound;
173     case ir::operation::ElementwiseUnary::Type::RSQRT:
174       return ops::ElementwiseUnaryType::kRSqrt;
175     case ir::operation::ElementwiseUnary::Type::SIN:
176       return ops::ElementwiseUnaryType::kSin;
177     case ir::operation::ElementwiseUnary::Type::SQRT:
178       return ops::ElementwiseUnaryType::kSqrt;
179     case ir::operation::ElementwiseUnary::Type::SQUARE:
180       return ops::ElementwiseUnaryType::kSquare;
181     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
182       return ops::ElementwiseUnaryType::kZerosLike;
183     default:
184       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
185   }
186 }
187
188 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
189 {
190   switch (type_ir)
191   {
192     case ir::operation::Pool2D::PoolType::AVG:
193       return ops::PoolType::kAvg;
194     case ir::operation::Pool2D::PoolType::MAX:
195       return ops::PoolType::kMax;
196     default:
197       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
198   }
199 }
200
201 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
202 {
203   switch (reduce_type_ir)
204   {
205     case ir::operation::Reduce::ReduceType::ALL:
206       return ops::ReduceType::kAll;
207     case ir::operation::Reduce::ReduceType::ANY:
208       return ops::ReduceType::kAny;
209     case ir::operation::Reduce::ReduceType::MAX:
210       return ops::ReduceType::kMax;
211     case ir::operation::Reduce::ReduceType::MIN:
212       return ops::ReduceType::kMin;
213     case ir::operation::Reduce::ReduceType::PROD:
214       return ops::ReduceType::kProd;
215     case ir::operation::Reduce::ReduceType::SUM:
216       return ops::ReduceType::kSum;
217     default:
218       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
219   }
220 }
221 } // namespace
222
223 KernelGenerator::KernelGenerator(
224     const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
225     const std::shared_ptr<TensorBuilder> &tensor_builder,
226     const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
227     const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
228     const std::shared_ptr<ExternalContext> &external_context)
229     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
230       _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
231       _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
232 {
233   // DO NOTHING
234 }
235
236 void KernelGenerator::visit(const ir::operation::AddN &node)
237 {
238   const auto output_index{node.getOutputs().at(0)};
239
240   std::vector<const IPortableTensor *> input_tensors;
241   for (auto &input_idx : node.getInputs())
242     input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
243
244   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
245
246   auto fn = std::make_unique<ops::AddNLayer>();
247
248   fn->configure(std::move(input_tensors), output_tensor);
249
250   _return_fn = std::move(fn);
251 }
252
253 void KernelGenerator::visit(const ir::OpSequence &op_seq)
254 {
255   assert(!_return_fn_seq);
256   assert(_tensor_builder->dynamicTensorManager());
257   assert(_tensor_reg);
258
259   auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
260
261   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
262
263   // Prepare to handle dynamic tensors later
264   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
265   {
266     dyn_ctx->op_seq = &op_seq;
267     dyn_ctx->operations = &_operations_ctx;
268     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
269     dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
270
271     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
272   }
273
274   _current_layout = op_seq.getLayout();
275   for (const auto &operation_idx : op_seq.operations())
276   {
277     const auto &node = _operations_ctx.at(operation_idx);
278     node.accept(*this);
279     _return_fn_seq->append(releaseFunction());
280
281     for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
282     {
283       auto portable_tensor = _tensor_reg->getPortableTensor(ind);
284       if (portable_tensor)
285       {
286         assert(portable_tensor->layout() == ir::Layout::NHWC);
287       }
288
289       auto tensor = _tensor_reg->getNativeTensor(ind);
290       if (tensor)
291       {
292         tensor->increase_ref();
293       }
294     }
295   }
296 }
297
298 void KernelGenerator::visit(const ir::operation::Conv2D &node)
299 {
300   using ir::operation::Conv2D;
301
302   const auto ofm_index{node.getOutputs().at(0)};
303   const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
304   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
305   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
306
307   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
308   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
309   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
310   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
311
312   const auto stride = node.param().stride;
313   const auto activation = node.param().activation;
314   const auto param_padding = node.param().padding;
315   const auto dilation = node.param().dilation;
316   auto fn = std::make_unique<ops::ConvolutionLayer>();
317
318   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
319   {
320     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
321                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
322                   stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
323                   activation, ofm_tensor);
324
325     _return_fn = std::move(fn);
326     return;
327   }
328   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
329   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
330   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
331   const auto &ker_shape = _ctx.at(ker_index).shape();
332   const auto ker_height = ker_shape.dim(1);
333   const auto ker_width = ker_shape.dim(2);
334
335   const auto padding =
336       ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
337                            dilation.width_factor, dilation.height_factor);
338
339   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
340                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
341                 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
342
343   _return_fn = std::move(fn);
344 }
345
346 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
347 {
348   using ir::operation::DepthwiseConv2D;
349
350   const auto ofm_index{node.getOutputs().at(0)};
351   const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
352   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
353   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
354
355   const auto stride = node.param().stride;
356   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
357   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
358   // Kernel format is [1, kernel_height, kernel_width, depth_out].
359   const auto &ker_shape = _ctx.at(ker_index).shape();
360   const auto ker_height = ker_shape.dim(1);
361   const auto ker_width = ker_shape.dim(2);
362   const auto dilation_width = node.param().dilation.width_factor;
363   const auto dilation_height = node.param().dilation.height_factor;
364   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
365                                             ker_width, ker_height, dilation_width, dilation_height);
366   const auto multiplier = node.param().multiplier;
367   const auto activation = node.param().activation;
368
369   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
370   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
371   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
372   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
373
374   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
375
376   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
377                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
378                 dilation_height, activation, ofm_tensor, _external_context);
379
380   _return_fn = std::move(fn);
381 }
382
383 void KernelGenerator::visit(const ir::operation::Concat &node)
384 {
385   const auto ofm_index{node.getOutputs().at(0)};
386
387   const auto rank = _ctx.at(ofm_index).shape().rank();
388   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
389
390   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
391
392   std::vector<const IPortableTensor *> input_tensors;
393   for (auto &ifm_idx : node.getInputs())
394     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
395
396   auto fn = std::make_unique<ops::ConcatLayer>();
397
398   fn->configure(input_tensors, axis, output_tensor);
399
400   _return_fn = std::move(fn);
401 }
402
403 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
404 {
405   const auto output_index{node.getOutputs().at(0)};
406   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
407   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
408
409   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
410   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
411   auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
412
413   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
414
415   IPortableTensor *crops_alloc = nullptr;
416   const auto NNApiInputs = 2;
417
418   if (node.getInputs().size() != NNApiInputs)
419   {
420     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
421     crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
422   }
423
424   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
425
426   _return_fn = std::move(fn);
427 }
428
429 void KernelGenerator::visit(const ir::operation::Fill &node)
430 {
431   const auto output_index{node.getOutputs().at(0)};
432   // SHAPE input is used for shape inference
433   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
434
435   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
436   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
437
438   auto fn = std::make_unique<ops::FillLayer>();
439
440   fn->configure(value_tensor, output_tensor);
441
442   _return_fn = std::move(fn);
443 }
444
445 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
446 {
447   using ir::operation::FullyConnected;
448
449   const auto output_index{node.getOutputs().at(0)};
450   const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
451   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
452   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
453   const auto activation = node.param().activation;
454   const auto weights_format = node.param().weights_format;
455
456   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
457   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
458   auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
459   auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
460
461   auto fn = std::make_unique<ops::FullyConnectedLayer>();
462
463   fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
464                 _external_context);
465
466   _return_fn = std::move(fn);
467 }
468
469 void KernelGenerator::visit(const ir::operation::Reshape &node)
470 {
471   const auto output_index{node.getOutputs().at(0)};
472   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
473
474   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
475   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
476
477   // optional 2nd input
478   IPortableTensor *shape_tensor = nullptr;
479
480   if (node.getInputs().size() == 2)
481   {
482     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
483     shape_tensor = _tensor_reg->getPortableTensor(shape_index);
484   }
485
486   auto fn = std::make_unique<ops::ReshapeLayer>();
487
488   fn->configure(input_tensor, shape_tensor, output_tensor);
489   _return_fn = std::move(fn);
490 }
491
492 void KernelGenerator::visit(const ir::operation::Squeeze &node)
493 {
494   const auto output_index{node.getOutputs().at(0)};
495   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
496
497   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
498   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
499
500   // Squeeze can share same kernel with reshape
501   auto fn = std::make_unique<ops::ReshapeLayer>();
502
503   fn->configure(input_tensor, nullptr, output_tensor);
504
505   _return_fn = std::move(fn);
506 }
507
508 void KernelGenerator::visit(const ir::operation::Softmax &node)
509 {
510   const auto output_index{node.getOutputs().at(0)};
511   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
512
513   const auto beta = node.param().beta;
514
515   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
516   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
517
518   auto fn = std::make_unique<ops::SoftMaxLayer>();
519
520   fn->configure(input_tensor, beta, output_tensor);
521
522   _return_fn = std::move(fn);
523 }
524
525 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
526 {
527   const auto ofm_index{node.getOutputs().at(0)};
528   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
529   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
530
531   const auto activation = node.param().activation;
532
533   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
534   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
535   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
536
537   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
538
539   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
540                 convertArithmeticType(node.param().arithmetic_type));
541
542   _return_fn = std::move(fn);
543 }
544
545 void KernelGenerator::visit(const ir::operation::Comparison &node)
546 {
547   const auto ofm_index{node.getOutputs().at(0)};
548   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
549   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
550
551   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
552   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
553   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
554
555   auto comparison_type = node.param().comparison_type;
556
557   auto fn = std::make_unique<ops::CompareLayer>();
558
559   fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
560
561   _return_fn = std::move(fn);
562 }
563
564 void KernelGenerator::visit(const ir::operation::Gather &node)
565 {
566   const auto output_index{node.getOutputs().at(0)};
567   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
568   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
569
570   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
571   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
572   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
573
574   const auto backend_layout = output_tensor->layout();
575   UNUSED_RELEASE(backend_layout);
576
577   // NOTE The frontend layout and backend layout must be the same for this operation.
578   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
579   //      is not not efficient even if it works well. If so, it would be better to set the
580   //      layout of these backend tensors to the same layout.
581   //      There is also one thing we have to think about. This operation depends on the layout of
582   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
583   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
584   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
585   assert(backend_layout == input_tensor->layout());
586   assert(backend_layout == indices_tensor->layout());
587   const auto &input_shape = _ctx.at(input_index).shape();
588   UNUSED_RELEASE(input_shape);
589   assert(input_shape.rank() < 4 || _current_layout == backend_layout);
590
591   const auto axis_raw = node.param().axis;
592   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
593
594   auto fn = std::make_unique<ops::GatherLayer>();
595
596   fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
597
598   _return_fn = std::move(fn);
599 }
600
601 void KernelGenerator::visit(const ir::operation::OneHot &node)
602 {
603   const auto output_index{node.getOutputs().at(0)};
604   const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
605   const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
606   const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
607   const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
608
609   const auto axis = node.param().axis;
610
611   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
612   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
613   auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
614   auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
615   auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
616
617   assert(indices_tensor->data_type() == OperandType::INT32);
618   assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
619
620   auto fn = std::make_unique<ops::OneHotLayer>();
621
622   fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
623
624   _return_fn = std::move(fn);
625 }
626
627 void KernelGenerator::visit(const ir::operation::Einsum &node)
628 {
629   const auto ofm_index{node.getOutputs().at(0)};
630
631   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
632   std::vector<const IPortableTensor *> input_tensors;
633   for (auto &ifm_idx : node.getInputs())
634     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
635
636   const auto equation = node.param().equation;
637
638   auto fn = std::make_unique<ops::EinsumLayer>();
639
640   fn->configure(input_tensors, equation, output_tensor);
641
642   _return_fn = std::move(fn);
643 }
644
645 void KernelGenerator::visit(const ir::operation::Custom &node)
646 {
647   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
648                           std::vector<custom::TypeInfo> &types,
649                           std::vector<IPortableTensor *> &tensors) {
650     for (auto &idx : opSeq)
651     {
652       const auto &operand = _ctx.at(idx);
653       // TODO make sure using `_current_layout` is correct for custom operations
654       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
655       auto in_tensor = _tensor_reg->getPortableTensor(idx);
656       tensors.emplace_back(in_tensor);
657     }
658   };
659
660   backend::custom::CustomKernelConfigParams params{};
661
662   fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
663   fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
664
665   params.userdata = node.userdata().data;
666   params.userdata_size = node.userdata().size;
667
668   auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
669
670   _return_fn = std::move(fn);
671 }
672
673 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
674 {
675   const auto output_index{node.getOutputs().at(0)};
676   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
677
678   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
679   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
680
681   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
682
683   fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
684                 convertElementwiseActivationType(node.param().op_type));
685
686   _return_fn = std::move(fn);
687 }
688
689 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
690 {
691   const auto output_index{node.getOutputs().at(0)};
692   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
693   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
694
695   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
696   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
697   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
698
699   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
700
701   fn->configure(lhs_tensor, rhs_tensor, output_tensor,
702                 convertElementwiseBinaryType(node.param().op_type));
703
704   _return_fn = std::move(fn);
705 }
706
707 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
708 {
709   const auto output_index{node.getOutputs().at(0)};
710   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
711
712   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
713   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
714
715   auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
716
717   fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
718
719   _return_fn = std::move(fn);
720 }
721
722 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
723 {
724   const auto output_index{node.getOutputs().at(0)};
725   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
726   // AXIS input is used for output shape inference
727
728   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
729   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
730
731   auto fn = std::make_unique<ops::ExpandDimsLayer>();
732
733   fn->configure(input_tensor, output_tensor);
734
735   _return_fn = std::move(fn);
736 }
737
738 void KernelGenerator::visit(const ir::operation::Pack &node)
739 {
740   const auto ofm_index{node.getOutputs().at(0)};
741
742   const auto rank = _ctx.at(ofm_index).shape().rank();
743   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
744
745   assert(-rank <= axis && axis < rank);
746
747   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
748
749   std::vector<const IPortableTensor *> input_tensors;
750   for (auto &ifm_idx : node.getInputs())
751     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
752
753   auto fn = std::make_unique<ops::PackLayer>();
754
755   fn->configure(input_tensors, axis, output_tensor);
756
757   _return_fn = std::move(fn);
758 }
759
760 void KernelGenerator::visit(const ir::operation::Unpack &node)
761 {
762   const auto input_index{node.getInputs().at(0)};
763
764   const auto rank = _ctx.at(input_index).shape().rank();
765   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
766
767   assert(rank == 0 || (-rank <= axis && axis < rank));
768
769   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
770
771   std::vector<IPortableTensor *> output_tensors;
772   for (auto &output_idx : node.getOutputs())
773     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
774
775   auto fn = std::make_unique<ops::UnpackLayer>();
776
777   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
778
779   fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
780
781   _return_fn = std::move(fn);
782 }
783
784 void KernelGenerator::visit(const ir::operation::Pad &node)
785 {
786   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
787   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
788   const auto output_index{node.getOutputs().at(0)};
789   assert(_ctx.at(pad_index).data());
790
791   auto input = _tensor_reg->getPortableTensor(input_index);
792   auto output = _tensor_reg->getPortableTensor(output_index);
793   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
794   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
795
796   auto fn = std::make_unique<ops::PadLayer>();
797
798   bool isPadV2 = node.getInputs().size() == 3 ? true : false;
799   const void *value = nullptr;
800
801   if (isPadV2)
802   {
803     const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
804     value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
805   }
806
807   fn->configure(input, output, pad_base, pad_rank, value);
808   _return_fn = std::move(fn);
809 }
810
811 void KernelGenerator::visit(const ir::operation::Transpose &node)
812 {
813   const auto output_index{node.getOutputs().at(0)};
814   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
815   const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
816
817   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
818   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
819   auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
820
821   auto fn = std::make_unique<ops::TransposeLayer>();
822
823   fn->configure(input_tensor, perm_tensor, output_tensor);
824
825   _return_fn = std::move(fn);
826 }
827
828 void KernelGenerator::visit(const ir::operation::Reduce &node)
829 {
830   const auto output_index{node.getOutputs().at(0)};
831   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
832   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
833
834   const auto keep_dims = node.param().keep_dims;
835   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
836   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
837   auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
838
839   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
840   {
841     auto fn = std::make_unique<ops::MeanLayer>();
842
843     fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
844
845     _return_fn = std::move(fn);
846   }
847   else
848   {
849     auto fn = std::make_unique<ops::ReduceLayer>();
850
851     const auto reduce_type = convertReduceType(node.param().reduce_type);
852     fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
853
854     _return_fn = std::move(fn);
855   }
856 }
857
858 void KernelGenerator::visit(const ir::operation::Select &node)
859 {
860   const auto output_index{node.getOutputs().at(0)};
861   const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
862   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
863   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
864
865   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
866   auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
867   auto true_tensor = _tensor_reg->getPortableTensor(true_index);
868   auto false_tensor = _tensor_reg->getPortableTensor(false_index);
869
870   auto fn = std::make_unique<ops::SelectLayer>();
871
872   fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
873
874   _return_fn = std::move(fn);
875 }
876
877 void KernelGenerator::visit(const ir::operation::Slice &node)
878 {
879   const auto output_index{node.getOutputs().at(0)};
880   const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
881   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
882   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
883
884   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
885   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
886   auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
887   auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
888
889   auto fn = std::make_unique<ops::SliceLayer>();
890
891   fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
892
893   _return_fn = std::move(fn);
894 }
895
896 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
897 {
898   const auto output_index{node.getOutputs().at(0)};
899   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
900   const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
901   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
902   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
903
904   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
905   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
906   auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
907   auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
908   auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
909
910   auto begin_mask = node.param().begin_mask;
911   auto end_mask = node.param().end_mask;
912   auto shrink_axis_mask = node.param().shrink_axis_mask;
913
914   auto fn = std::make_unique<ops::StridedSliceLayer>();
915
916   fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
917                 end_mask, shrink_axis_mask);
918
919   _return_fn = std::move(fn);
920 }
921
922 void KernelGenerator::visit(const ir::operation::Split &node)
923 {
924   const auto num_splits = node.param().num_splits;
925   assert(num_splits == static_cast<int>(node.getOutputs().size()));
926
927   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
928   const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
929
930   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
931   auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
932
933   std::vector<IPortableTensor *> out_tensors;
934   for (auto &output_idx : node.getOutputs())
935     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
936
937   auto fn = std::make_unique<ops::SplitLayer>();
938
939   fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
940
941   _return_fn = std::move(fn);
942 }
943
944 void KernelGenerator::visit(const ir::operation::Shape &node)
945 {
946   const auto ofm_index{node.getOutputs().at(0)};
947   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
948
949   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
950   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
951
952   auto fn = std::make_unique<ops::ShapeLayer>();
953
954   fn->configure(ifm_tensor, ofm_tensor);
955
956   _return_fn = std::move(fn);
957 }
958
959 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
960 {
961   const auto output_index{node.getOutputs().at(0)};
962   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
963
964   auto align_corners = node.param().align_corners;
965   auto half_pixel_centers = node.param().half_pixel_centers;
966
967   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
968   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
969
970   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
971
972   if (node.getInputs().size() == 1)
973   {
974     fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
975                   align_corners, half_pixel_centers);
976   }
977   else
978   {
979     assert(node.getInputs().size() == 2);
980     const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
981     auto size_tensor = _tensor_reg->getPortableTensor(size_index);
982     if (size_tensor->is_constant())
983     {
984       auto size_vec = _ctx.at(size_index).asVector<int32_t>();
985       const auto height_out = size_vec[0];
986       const auto width_out = size_vec[1];
987       fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
988                     half_pixel_centers);
989     }
990     else
991     {
992       fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
993     }
994   }
995
996   _return_fn = std::move(fn);
997 }
998
999 void KernelGenerator::visit(const ir::operation::Reverse &node)
1000 {
1001   const auto output_index{node.getOutputs().at(0)};
1002   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
1003   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
1004
1005   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1006   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1007   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1008
1009   auto fn = std::make_unique<ops::ReverseLayer>();
1010
1011   fn->configure(input_tensor, axis_tensor, output_tensor);
1012
1013   _return_fn = std::move(fn);
1014 }
1015
1016 void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1017 {
1018   const auto output_index{node.getOutputs().at(0)};
1019   const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
1020   const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
1021
1022   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1023   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1024   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1025
1026   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1027
1028   fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1029
1030   _return_fn = std::move(fn);
1031 }
1032
1033 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1034 {
1035   const auto ofm_index{node.getOutputs().at(0)};
1036   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1037
1038   const auto kh = node.param().kh;
1039   const auto kw = node.param().kw;
1040   const auto stride = node.param().stride;
1041   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
1042   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
1043   const auto padding =
1044       ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1045   const auto activation = node.param().activation;
1046
1047   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1048   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1049
1050   auto fn = std::make_unique<ops::PoolLayer>();
1051
1052   fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1053                 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1054                 convertPoolType(node.param().op_type));
1055
1056   _return_fn = std::move(fn);
1057 }
1058
1059 void KernelGenerator::visit(const ir::operation::Pow &node)
1060 {
1061   const auto output_index{node.getOutputs().at(0)};
1062   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1063   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1064
1065   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1066   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1067   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1068
1069   auto fn = std::make_unique<ops::PowLayer>();
1070
1071   fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1072
1073   _return_fn = std::move(fn);
1074 }
1075
1076 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1077 {
1078   const auto output_index{node.getOutputs().at(0)};
1079   const auto input_index{node.getInputs().at(0)};
1080
1081   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1082   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1083
1084   auto fn = std::make_unique<ops::L2NormLayer>();
1085
1086   fn->configure(input_alloc, output_alloc);
1087
1088   _return_fn = std::move(fn);
1089 }
1090
1091 void KernelGenerator::visit(const ir::operation::Range &node)
1092 {
1093   const auto output_index{node.getOutputs().at(0)};
1094   const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1095   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1096   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1097
1098   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1099   auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1100   auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1101   auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1102
1103   auto fn = std::make_unique<ops::RangeLayer>();
1104
1105   fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1106   _return_fn = std::move(fn);
1107 }
1108
1109 void KernelGenerator::visit(const ir::operation::Rank &node)
1110 {
1111   const auto ofm_index{node.getOutputs().at(0)};
1112   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1113
1114   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1115   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1116
1117   auto fn = std::make_unique<ops::RankLayer>();
1118
1119   fn->configure(ifm_tensor, ofm_tensor);
1120
1121   _return_fn = std::move(fn);
1122 }
1123
1124 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1125 {
1126   const auto ofm_index{node.getOutputs().at(0)};
1127   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1128   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1129
1130   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1131   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1132   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1133
1134   auto fn = std::make_unique<ops::SqDiffLayer>();
1135
1136   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1137   _return_fn = std::move(fn);
1138 }
1139
1140 void KernelGenerator::visit(const ir::operation::Tile &node)
1141 {
1142   const auto output_index{node.getOutputs().at(0)};
1143   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1144   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1145
1146   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1147   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1148   auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1149
1150   auto fn = std::make_unique<ops::TileLayer>();
1151
1152   fn->configure(input_tensor, multiples_tensor, output_tensor);
1153   _return_fn = std::move(fn);
1154 }
1155
1156 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1157 {
1158   const auto output_index{node.getOutputs().at(0)};
1159   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1160   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1161   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1162
1163   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1164   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1165   auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1166   auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1167
1168   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1169
1170   fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1171   _return_fn = std::move(fn);
1172 }
1173
1174 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1175 {
1176   const auto output_index{node.getOutputs().at(0)};
1177   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1178   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1179
1180   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1181   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1182   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1183
1184   const auto adj_x = node.param().adj_x;
1185   const auto adj_y = node.param().adj_y;
1186
1187   auto fn = std::make_unique<ops::BatchMatMulLayer>();
1188
1189   fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1190   _return_fn = std::move(fn);
1191 }
1192
1193 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1194 {
1195   const auto output_index{node.getOutputs().at(0)};
1196   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1197   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1198
1199   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1200   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1201   auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1202
1203   auto fn = std::make_unique<ops::BroadcastToLayer>();
1204
1205   fn->configure(input_tensor, shape_tensor, output_tensor);
1206
1207   _return_fn = std::move(fn);
1208 }
1209
1210 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1211 {
1212   const auto ofm_index{node.getOutputs().at(0)};
1213
1214   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1215   std::vector<const IPortableTensor *> input_tensors;
1216   for (auto &ifm_idx : node.getInputs())
1217     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1218
1219   const auto epsilon = node.param().epsilon;
1220   const auto is_training = node.param().is_training;
1221   const auto data_format = node.param().data_format;
1222
1223   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1224
1225   fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1226
1227   _return_fn = std::move(fn);
1228 }
1229
1230 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1231 {
1232   const auto output_index{node.getOutputs().at(0)};
1233   const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1234
1235   const auto beta = node.param().beta;
1236   const auto axis = node.param().axis;
1237
1238   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1239   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1240
1241   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1242
1243   fn->configure(input_tensor, beta, axis, output_tensor);
1244
1245   _return_fn = std::move(fn);
1246 }
1247
1248 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1249 {
1250   const auto output_index{node.getOutputs().at(0)};
1251   const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1252   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1253   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1254
1255   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1256   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1257   auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1258   auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1259
1260   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1261
1262   fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1263
1264   _return_fn = std::move(fn);
1265 }
1266
1267 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1268 {
1269   const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1270   const auto output_index{node.getOutputs().at(0)};
1271   auto block_size = node.param().block_size;
1272
1273   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1274   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1275
1276   auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1277
1278   fn->configure(input_tensor, block_size, output_tensor);
1279   _return_fn = std::move(fn);
1280 }
1281
1282 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1283 {
1284   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1285   const auto output_index{node.getOutputs().at(0)};
1286   auto block_size = node.param().block_size;
1287
1288   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1289   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1290
1291   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1292
1293   fn->configure(input_tensor, block_size, output_tensor);
1294   _return_fn = std::move(fn);
1295 }
1296
1297 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1298 {
1299   const auto output_index{node.getOutputs().at(0)};
1300   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1301   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1302
1303   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1304   auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1305   auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1306
1307   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1308
1309   fn->configure(shape_alloc, seed_alloc, output_alloc);
1310   _return_fn = std::move(fn);
1311 }
1312
1313 void KernelGenerator::visit(const ir::operation::SplitV &node)
1314 {
1315   const auto num_splits = node.param().num_splits;
1316   assert(num_splits == static_cast<int>(node.getOutputs().size()));
1317
1318   const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1319   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1320   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1321
1322   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1323   auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1324   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1325
1326   std::vector<IPortableTensor *> out_tensors;
1327   for (auto &output_idx : node.getOutputs())
1328     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1329
1330   auto fn = std::make_unique<ops::SplitVLayer>();
1331
1332   fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1333
1334   _return_fn = std::move(fn);
1335 }
1336
1337 void KernelGenerator::visit(const ir::operation::LSTM &node)
1338 {
1339   const auto scratch_buffer_index{
1340       node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1341   const auto output_state_out_index{
1342       node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1343   const auto cell_state_out_index{
1344       node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1345   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1346
1347   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1348   const auto input_to_input_weights_index{
1349       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1350   const auto input_to_forget_weights_index{
1351       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1352   const auto input_to_cell_weights_index{
1353       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1354   const auto input_to_output_weights_index{
1355       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1356   const auto recurrent_to_input_weights_index{
1357       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1358   const auto recurrent_to_forget_weights_index{
1359       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1360   const auto recurrent_to_cell_weights_index{
1361       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1362   const auto recurrent_to_output_weights_index{
1363       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1364   const auto cell_to_input_weights_index{
1365       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1366   const auto cell_to_forget_weights_index{
1367       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1368   const auto cell_to_output_weights_index{
1369       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1370   const auto input_gate_bias_index{
1371       node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1372   const auto forget_gate_bias_index{
1373       node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1374   const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1375   const auto output_gate_bias_index{
1376       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1377   const auto projection_weights_index{
1378       node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1379   const auto projection_bias_index{
1380       node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1381   const auto output_state_in_index{
1382       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1383   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1384   const auto time_major = node.param().time_major;
1385
1386   // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1387   // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1388   // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1389   // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1390   bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1391                                     (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1392                                      _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1393   bool has_recurrent_to_input_weights =
1394       _ctx.exist(recurrent_to_input_weights_index) &&
1395       (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1396        _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1397
1398   // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1399   // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1400   // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1401   // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1402   bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1403                                     _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1404   bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1405                                     _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1406
1407   bool has_input_gate_bias =
1408       _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1409
1410   bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1411                                 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1412                                  _ctx.at(projection_weights_index).shape().dim(1) != 0);
1413   bool has_projection_bias =
1414       _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1415
1416   auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1417                                    ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1418                                    : nullptr; // optional
1419   auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1420                                      ? _tensor_reg->getPortableTensor(output_state_out_index)
1421                                      : nullptr; // optional
1422   auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1423                                    ? _tensor_reg->getPortableTensor(cell_state_out_index)
1424                                    : nullptr; // optional
1425   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1426
1427   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1428
1429   auto input_to_input_weights_tensor =
1430       has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1431                                  : nullptr; // optional
1432   auto input_to_forget_weights_tensor =
1433       _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1434   auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1435   auto input_to_output_weights_tensor =
1436       _tensor_reg->getPortableTensor(input_to_output_weights_index);
1437   auto recurrent_to_input_weights_tensor =
1438       has_recurrent_to_input_weights
1439           ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1440           : nullptr; // optional
1441   auto recurrent_to_forget_weights_tensor =
1442       _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1443   auto recurrent_to_cell_weights_tensor =
1444       _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1445   auto recurrent_to_output_weights_tensor =
1446       _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1447
1448   auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1449   auto cell_to_forget_weights_tensor =
1450       has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1451                                  : nullptr; // optional
1452   auto cell_to_output_weights_tensor =
1453       has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1454                                  : nullptr; // optional
1455
1456   auto input_gate_bias_tensor =
1457       has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1458   auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1459   auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1460   auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1461   auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1462   auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1463
1464   auto projection_weights_tensor = has_projection_weights
1465                                        ? _tensor_reg->getPortableTensor(projection_weights_index)
1466                                        : nullptr; // optional
1467   auto projection_bias_tensor = has_projection_bias
1468                                     ? _tensor_reg->getPortableTensor(projection_bias_index)
1469                                     : nullptr; // optional
1470
1471   IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1472   IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1473   IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1474   IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1475   if (node.getInputs().size() == 24)
1476   {
1477     const auto input_layer_norm_weights_index{
1478         node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1479     const auto forget_layer_norm_weights_index{
1480         node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1481     const auto cell_layer_norm_weights_index{
1482         node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1483     const auto output_layer_norm_weights_index{
1484         node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1485
1486     input_layer_norm_weights_tensor =
1487         _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1488     forget_layer_norm_weights_tensor =
1489         _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1490     cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1491     output_layer_norm_weights_tensor =
1492         _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1493   }
1494
1495   auto fn = std::make_unique<ops::LSTMLayer>();
1496
1497   fn->configure(
1498       input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1499       input_to_cell_weights_tensor, input_to_output_weights_tensor,
1500       recurrent_to_input_weights_tensor, recurrent_to_forget_weights_tensor,
1501       recurrent_to_cell_weights_tensor, recurrent_to_output_weights_tensor,
1502       cell_to_input_weights_tensor, cell_to_forget_weights_tensor, cell_to_output_weights_tensor,
1503       input_layer_norm_weights_tensor, forget_layer_norm_weights_tensor,
1504       cell_layer_norm_weights_tensor, output_layer_norm_weights_tensor,
1505       /*aux_input=*/nullptr,
1506       /*aux_input_to_input_weights=*/nullptr,
1507       /*aux_input_to_forget_weights=*/nullptr,
1508       /*aux_input_to_cell_weights=*/nullptr,
1509       /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1510       cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1511       projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1512       /*forward_sequence=*/true, time_major,
1513       /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1514       output_tensor,
1515       !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1516       !_ctx.at(cell_state_in_index).info().isVariable());
1517
1518   _return_fn = std::move(fn);
1519 }
1520
1521 } // namespace cpu
1522 } // namespace backend
1523 } // namespace onert