Imported Upstream version 1.18.0
[platform/core/ml/nnfw.git] / runtime / onert / backend / cpu / KernelGenerator.cc
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "KernelGenerator.h"
18
19 #include "ops/AddNLayer.h"
20 #include "ops/ArgMinMaxLayer.h"
21 #include "ops/BatchToSpaceNDLayer.h"
22 #include "ops/BinaryArithmeticLayer.h"
23 #include "ops/CompareLayer.h"
24 #include "ops/ConcatLayer.h"
25 #include "ops/ConvolutionLayer.h"
26 #include "ops/DepthToSpaceLayer.h"
27 #include "ops/DepthwiseConvolutionLayer.h"
28 #include "ops/EinsumLayer.h"
29 #include "ops/ElementwiseActivationLayer.h"
30 #include "ops/ElementwiseBinaryLayer.h"
31 #include "ops/ElementwiseUnaryLayer.h"
32 #include "ops/ExpandDimsLayer.h"
33 #include "ops/FillLayer.h"
34 #include "ops/FullyConnectedLayer.h"
35 #include "ops/GatherLayer.h"
36 #include "ops/LSTMLayer.h"
37 #include "ops/MeanLayer.h"
38 #include "ops/DetectionPostProcessLayer.h"
39 #include "ops/OneHotLayer.h"
40 #include "ops/OperationUtils.h"
41 #include "ops/PackLayer.h"
42 #include "ops/PadLayer.h"
43 #include "ops/PoolLayer.h"
44 #include "ops/PowLayer.h"
45 #include "ops/QuantizeLayer.h"
46 #include "ops/RangeLayer.h"
47 #include "ops/RankLayer.h"
48 #include "ops/ReduceLayer.h"
49 #include "ops/ReshapeLayer.h"
50 #include "ops/ResizeBilinearLayer.h"
51 #include "ops/ReverseLayer.h"
52 #include "ops/SelectLayer.h"
53 #include "ops/ShapeLayer.h"
54 #include "ops/SliceLayer.h"
55 #include "ops/SoftMaxLayer.h"
56 #include "ops/StridedSliceLayer.h"
57 #include "ops/SpaceToBatchNDLayer.h"
58 #include "ops/SpaceToDepthLayer.h"
59 #include "ops/SplitLayer.h"
60 #include "ops/SplitVLayer.h"
61 #include "ops/TileLayer.h"
62 #include "ops/TransposeLayer.h"
63 #include "ops/UnpackLayer.h"
64 #include "ops/SquaredDiffLayer.h"
65 #include "ops/L2NormLayer.h"
66 #include "ops/MatrixBandPartLayer.h"
67 #include "ops/BatchMatMulLayer.h"
68 #include "ops/BroadcastToLayer.h"
69 #include "ops/FusedBatchNormLayer.h"
70 #include "ops/LogSoftMaxLayer.h"
71 #include "ops/StatelessRandomUniformLayer.h"
72
73 #include <backend/Backend.h>
74 #include <backend/IConfig.h>
75 #include <memory>
76 #include <util/Utils.h>
77 #include <util/logging.h>
78 #include <exec/DynamicShapeInferer.h>
79
80 #include <stdexcept>
81
82 namespace onert
83 {
84 namespace backend
85 {
86 namespace cpu
87 {
88
89 namespace
90 {
91 ops::ArithmeticType
92 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
93 {
94   switch (arithmetic_type_ir)
95   {
96     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
97       return ops::ArithmeticType::kAdd;
98     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
99       return ops::ArithmeticType::kSub;
100     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
101       return ops::ArithmeticType::kMul;
102     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
103       return ops::ArithmeticType::kDiv;
104     default:
105       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
106   }
107 }
108
109 ops::ElementwiseActivationType
110 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
111 {
112   switch (type_ir)
113   {
114     case ir::operation::ElementwiseActivation::Type::ELU:
115       return ops::ElementwiseActivationType::kElu;
116     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
117       return ops::ElementwiseActivationType::kLogistic;
118     case ir::operation::ElementwiseActivation::Type::RELU:
119       return ops::ElementwiseActivationType::kReLU;
120     case ir::operation::ElementwiseActivation::Type::TANH:
121       return ops::ElementwiseActivationType::kTanh;
122     case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
123       return ops::ElementwiseActivationType::kLeakyReLU;
124     default:
125       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
126   }
127 }
128
129 ops::ElementwiseBinaryType
130 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
131 {
132   switch (type_ir)
133   {
134     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::FLOOR_DIV:
135       return ops::ElementwiseBinaryType::kFloorDiv;
136     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
137       return ops::ElementwiseBinaryType::kLogicalAnd;
138     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
139       return ops::ElementwiseBinaryType::kLogicalOr;
140     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
141       return ops::ElementwiseBinaryType::kMax;
142     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
143       return ops::ElementwiseBinaryType::kMin;
144     default:
145       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
146   }
147 }
148
149 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
150 {
151   switch (type_ir)
152   {
153     case ir::operation::ElementwiseUnary::Type::ABS:
154       return ops::ElementwiseUnaryType::kAbs;
155     case ir::operation::ElementwiseUnary::Type::CAST:
156       return ops::ElementwiseUnaryType::kCast;
157     case ir::operation::ElementwiseUnary::Type::COS:
158       return ops::ElementwiseUnaryType::kCos;
159     case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
160       return ops::ElementwiseUnaryType::kDequantize;
161     case ir::operation::ElementwiseUnary::Type::ERF:
162       return ops::ElementwiseUnaryType::kErf;
163     case ir::operation::ElementwiseUnary::Type::EXP:
164       return ops::ElementwiseUnaryType::kExp;
165     case ir::operation::ElementwiseUnary::Type::FLOOR:
166       return ops::ElementwiseUnaryType::kFloor;
167     case ir::operation::ElementwiseUnary::Type::LOG:
168       return ops::ElementwiseUnaryType::kLog;
169     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
170       return ops::ElementwiseUnaryType::kLogicalNot;
171     case ir::operation::ElementwiseUnary::Type::NEG:
172       return ops::ElementwiseUnaryType::kNeg;
173     case ir::operation::ElementwiseUnary::Type::QUANTIZE:
174       return ops::ElementwiseUnaryType::kQuantize;
175     case ir::operation::ElementwiseUnary::Type::ROUND:
176       return ops::ElementwiseUnaryType::kRound;
177     case ir::operation::ElementwiseUnary::Type::RSQRT:
178       return ops::ElementwiseUnaryType::kRSqrt;
179     case ir::operation::ElementwiseUnary::Type::SIN:
180       return ops::ElementwiseUnaryType::kSin;
181     case ir::operation::ElementwiseUnary::Type::SQRT:
182       return ops::ElementwiseUnaryType::kSqrt;
183     case ir::operation::ElementwiseUnary::Type::SQUARE:
184       return ops::ElementwiseUnaryType::kSquare;
185     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
186       return ops::ElementwiseUnaryType::kZerosLike;
187     default:
188       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
189   }
190 }
191
192 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
193 {
194   switch (type_ir)
195   {
196     case ir::operation::Pool2D::PoolType::AVG:
197       return ops::PoolType::kAvg;
198     case ir::operation::Pool2D::PoolType::MAX:
199       return ops::PoolType::kMax;
200     default:
201       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
202   }
203 }
204
205 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
206 {
207   switch (reduce_type_ir)
208   {
209     case ir::operation::Reduce::ReduceType::ALL:
210       return ops::ReduceType::kAll;
211     case ir::operation::Reduce::ReduceType::ANY:
212       return ops::ReduceType::kAny;
213     case ir::operation::Reduce::ReduceType::MAX:
214       return ops::ReduceType::kMax;
215     case ir::operation::Reduce::ReduceType::MIN:
216       return ops::ReduceType::kMin;
217     case ir::operation::Reduce::ReduceType::PROD:
218       return ops::ReduceType::kProd;
219     case ir::operation::Reduce::ReduceType::SUM:
220       return ops::ReduceType::kSum;
221     default:
222       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
223   }
224 }
225 } // namespace
226
227 KernelGenerator::KernelGenerator(
228   const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
229   const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
230   const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
231   const std::shared_ptr<ExternalContext> &external_context)
232   : basic::KernelGeneratorBase{graph},
233     _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
234     _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
235     _external_context(external_context)
236 {
237   // DO NOTHING
238 }
239
240 std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
241 {
242   auto ret = std::make_unique<exec::FunctionSequence>();
243
244   assert(_tensor_builder->dynamicTensorManager());
245   assert(_tensor_reg);
246
247   auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
248
249   // Prepare to handle dynamic tensors later
250   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
251   {
252     dyn_ctx->op_ind = ind;
253     dyn_ctx->operations = &_operations_ctx;
254     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
255
256     ret->dynamic_tensor_ctx(dyn_ctx);
257   }
258
259   auto &op = _graph.operations().at(ind);
260   op.accept(*this);
261   assert(_return_fn); // _return_fn must have been generated
262   ret->append(std::move(_return_fn));
263
264   for (auto ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
265   {
266     auto portable_tensor = _tensor_reg->getPortableTensor(ind);
267     if (portable_tensor)
268     {
269       assert(portable_tensor->layout() == ir::Layout::NHWC);
270     }
271
272     auto tensor = _tensor_reg->getNativeTensor(ind);
273     if (tensor)
274     {
275       tensor->increase_ref();
276     }
277   }
278   return ret;
279 }
280
281 void KernelGenerator::visit(const ir::operation::AddN &node)
282 {
283   const auto output_index{node.getOutputs().at(0)};
284
285   std::vector<const IPortableTensor *> input_tensors;
286   for (auto &input_idx : node.getInputs())
287     input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
288
289   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
290
291   auto fn = std::make_unique<ops::AddNLayer>();
292
293   fn->configure(std::move(input_tensors), output_tensor);
294
295   _return_fn = std::move(fn);
296 }
297
298 void KernelGenerator::visit(const ir::operation::Conv2D &node)
299 {
300   using ir::operation::Conv2D;
301
302   const auto ofm_index{node.getOutputs().at(0)};
303   const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
304   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
305   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
306
307   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
308   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
309   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
310   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
311
312   const auto stride = node.param().stride;
313   const auto activation = node.param().activation;
314   const auto param_padding = node.param().padding;
315   const auto dilation = node.param().dilation;
316   auto fn = std::make_unique<ops::ConvolutionLayer>();
317
318   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
319   {
320     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
321                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
322                   stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
323                   activation, ofm_tensor);
324
325     _return_fn = std::move(fn);
326     return;
327   }
328   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
329   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
330   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
331   const auto &ker_shape = _ctx.at(ker_index).shape();
332   const auto ker_height = ker_shape.dim(1);
333   const auto ker_width = ker_shape.dim(2);
334
335   const auto padding =
336     ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
337                          dilation.width_factor, dilation.height_factor);
338
339   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
340                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
341                 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
342
343   _return_fn = std::move(fn);
344 }
345
346 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
347 {
348   using ir::operation::DepthwiseConv2D;
349
350   const auto ofm_index{node.getOutputs().at(0)};
351   const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
352   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
353   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
354
355   const auto stride = node.param().stride;
356   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
357   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
358   // Kernel format is [1, kernel_height, kernel_width, depth_out].
359   const auto &ker_shape = _ctx.at(ker_index).shape();
360   const auto ker_height = ker_shape.dim(1);
361   const auto ker_width = ker_shape.dim(2);
362   const auto dilation_width = node.param().dilation.width_factor;
363   const auto dilation_height = node.param().dilation.height_factor;
364   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
365                                             ker_width, ker_height, dilation_width, dilation_height);
366   const auto multiplier = node.param().multiplier;
367   const auto activation = node.param().activation;
368
369   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
370   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
371   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
372   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
373
374   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
375
376   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
377                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
378                 dilation_height, activation, ofm_tensor, _external_context);
379
380   _return_fn = std::move(fn);
381 }
382
383 void KernelGenerator::visit(const ir::operation::Concat &node)
384 {
385   const auto ofm_index{node.getOutputs().at(0)};
386
387   const auto rank = _ctx.at(ofm_index).shape().rank();
388   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
389
390   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
391
392   std::vector<const IPortableTensor *> input_tensors;
393   for (auto &ifm_idx : node.getInputs())
394     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
395
396   auto fn = std::make_unique<ops::ConcatLayer>();
397
398   fn->configure(input_tensors, axis, output_tensor);
399
400   _return_fn = std::move(fn);
401 }
402
403 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
404 {
405   const auto output_index{node.getOutputs().at(0)};
406   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
407   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
408
409   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
410   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
411   auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
412
413   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
414
415   IPortableTensor *crops_alloc = nullptr;
416   const auto NNApiInputs = 2;
417
418   if (node.getInputs().size() != NNApiInputs)
419   {
420     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
421     crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
422   }
423
424   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
425
426   _return_fn = std::move(fn);
427 }
428
429 void KernelGenerator::visit(const ir::operation::Fill &node)
430 {
431   const auto output_index{node.getOutputs().at(0)};
432   // SHAPE input is used for shape inference
433   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
434
435   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
436   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
437
438   auto fn = std::make_unique<ops::FillLayer>();
439
440   fn->configure(value_tensor, output_tensor);
441
442   _return_fn = std::move(fn);
443 }
444
445 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
446 {
447   using ir::operation::FullyConnected;
448
449   const auto output_index{node.getOutputs().at(0)};
450   const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
451   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
452   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
453   const auto activation = node.param().activation;
454   const auto weights_format = node.param().weights_format;
455
456   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
457   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
458   auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
459   auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
460
461   auto fn = std::make_unique<ops::FullyConnectedLayer>();
462
463   fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
464                 _external_context);
465
466   _return_fn = std::move(fn);
467 }
468
469 void KernelGenerator::visit(const ir::operation::Reshape &node)
470 {
471   const auto output_index{node.getOutputs().at(0)};
472   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
473
474   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
475   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
476
477   // optional 2nd input
478   IPortableTensor *shape_tensor = nullptr;
479
480   if (node.getInputs().size() == 2)
481   {
482     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
483     shape_tensor = _tensor_reg->getPortableTensor(shape_index);
484   }
485
486   auto fn = std::make_unique<ops::ReshapeLayer>();
487
488   fn->configure(input_tensor, shape_tensor, output_tensor);
489   _return_fn = std::move(fn);
490 }
491
492 void KernelGenerator::visit(const ir::operation::Squeeze &node)
493 {
494   const auto output_index{node.getOutputs().at(0)};
495   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
496
497   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
498   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
499
500   // Squeeze can share same kernel with reshape
501   auto fn = std::make_unique<ops::ReshapeLayer>();
502
503   fn->configure(input_tensor, nullptr, output_tensor);
504
505   _return_fn = std::move(fn);
506 }
507
508 void KernelGenerator::visit(const ir::operation::Softmax &node)
509 {
510   const auto output_index{node.getOutputs().at(0)};
511   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
512
513   const auto beta = node.param().beta;
514
515   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
516   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
517
518   auto fn = std::make_unique<ops::SoftMaxLayer>();
519
520   fn->configure(input_tensor, beta, output_tensor);
521
522   _return_fn = std::move(fn);
523 }
524
525 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
526 {
527   const auto ofm_index{node.getOutputs().at(0)};
528   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
529   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
530
531   const auto activation = node.param().activation;
532
533   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
534   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
535   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
536
537   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
538
539   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
540                 convertArithmeticType(node.param().arithmetic_type));
541
542   _return_fn = std::move(fn);
543 }
544
545 void KernelGenerator::visit(const ir::operation::Comparison &node)
546 {
547   const auto ofm_index{node.getOutputs().at(0)};
548   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
549   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
550
551   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
552   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
553   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
554
555   auto comparison_type = node.param().comparison_type;
556
557   auto fn = std::make_unique<ops::CompareLayer>();
558
559   fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
560
561   _return_fn = std::move(fn);
562 }
563
564 void KernelGenerator::visit(const ir::operation::Gather &node)
565 {
566   const auto output_index{node.getOutputs().at(0)};
567   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
568   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
569
570   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
571   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
572   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
573
574   const auto backend_layout = output_tensor->layout();
575   UNUSED_RELEASE(backend_layout);
576
577   // NOTE The frontend layout and backend layout must be the same for this operation.
578   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
579   //      is not not efficient even if it works well. If so, it would be better to set the
580   //      layout of these backend tensors to the same layout.
581   //      There is also one thing we have to think about. This operation depends on the layout of
582   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
583   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
584   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
585   assert(backend_layout == input_tensor->layout());
586   assert(backend_layout == indices_tensor->layout());
587   const auto &input_shape = _ctx.at(input_index).shape();
588   UNUSED_RELEASE(input_shape);
589   assert(input_shape.rank() < 4 || _current_layout == backend_layout);
590
591   const auto axis_raw = node.param().axis;
592   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
593
594   auto fn = std::make_unique<ops::GatherLayer>();
595
596   fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
597
598   _return_fn = std::move(fn);
599 }
600
601 void KernelGenerator::visit(const ir::operation::OneHot &node)
602 {
603   const auto output_index{node.getOutputs().at(0)};
604   const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
605   const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
606   const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
607   const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
608
609   const auto axis = node.param().axis;
610
611   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
612   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
613   auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
614   auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
615   auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
616
617   assert(indices_tensor->data_type() == OperandType::INT32);
618   assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
619
620   auto fn = std::make_unique<ops::OneHotLayer>();
621
622   fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
623
624   _return_fn = std::move(fn);
625 }
626
627 void KernelGenerator::visit(const ir::operation::Einsum &node)
628 {
629   const auto ofm_index{node.getOutputs().at(0)};
630
631   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
632   std::vector<const IPortableTensor *> input_tensors;
633   for (auto &ifm_idx : node.getInputs())
634     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
635
636   const auto equation = node.param().equation;
637
638   auto fn = std::make_unique<ops::EinsumLayer>();
639
640   fn->configure(input_tensors, equation, output_tensor);
641
642   _return_fn = std::move(fn);
643 }
644
645 void KernelGenerator::visit(const ir::operation::Custom &node)
646 {
647   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
648                           std::vector<custom::TypeInfo> &types,
649                           std::vector<IPortableTensor *> &tensors) {
650     for (auto &idx : opSeq)
651     {
652       const auto &operand = _ctx.at(idx);
653       // TODO make sure using `_current_layout` is correct for custom operations
654       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
655       auto in_tensor = _tensor_reg->getPortableTensor(idx);
656       tensors.emplace_back(in_tensor);
657     }
658   };
659
660   backend::custom::CustomKernelConfigParams params{};
661
662   fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
663   fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
664
665   params.userdata = node.userdata().data;
666   params.userdata_size = node.userdata().size;
667
668   auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
669
670   _return_fn = std::move(fn);
671 }
672
673 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
674 {
675   const auto output_index{node.getOutputs().at(0)};
676   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
677
678   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
679   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
680
681   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
682
683   fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
684                 convertElementwiseActivationType(node.param().op_type));
685
686   _return_fn = std::move(fn);
687 }
688
689 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
690 {
691   const auto output_index{node.getOutputs().at(0)};
692   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
693   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
694
695   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
696   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
697   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
698
699   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
700
701   fn->configure(lhs_tensor, rhs_tensor, output_tensor,
702                 convertElementwiseBinaryType(node.param().op_type));
703
704   _return_fn = std::move(fn);
705 }
706
707 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
708 {
709   const auto output_index{node.getOutputs().at(0)};
710   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
711
712   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
713   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
714
715   if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
716   {
717     auto fn = std::make_unique<ops::QuantizeLayer>();
718     fn->configure(input_tensor, output_tensor);
719     _return_fn = std::move(fn);
720   }
721   else
722   {
723     auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
724     fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
725     _return_fn = std::move(fn);
726   }
727 }
728
729 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
730 {
731   const auto output_index{node.getOutputs().at(0)};
732   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
733   // AXIS input is used for output shape inference
734
735   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
736   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
737
738   auto fn = std::make_unique<ops::ExpandDimsLayer>();
739
740   fn->configure(input_tensor, output_tensor);
741
742   _return_fn = std::move(fn);
743 }
744
745 void KernelGenerator::visit(const ir::operation::Pack &node)
746 {
747   const auto ofm_index{node.getOutputs().at(0)};
748
749   const auto rank = _ctx.at(ofm_index).shape().rank();
750   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
751
752   assert(-rank <= axis && axis < rank);
753
754   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
755
756   std::vector<const IPortableTensor *> input_tensors;
757   for (auto &ifm_idx : node.getInputs())
758     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
759
760   auto fn = std::make_unique<ops::PackLayer>();
761
762   fn->configure(input_tensors, axis, output_tensor);
763
764   _return_fn = std::move(fn);
765 }
766
767 void KernelGenerator::visit(const ir::operation::Unpack &node)
768 {
769   const auto input_index{node.getInputs().at(0)};
770
771   const auto rank = _ctx.at(input_index).shape().rank();
772   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
773
774   assert(rank == 0 || (-rank <= axis && axis < rank));
775
776   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
777
778   std::vector<IPortableTensor *> output_tensors;
779   for (auto &output_idx : node.getOutputs())
780     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
781
782   auto fn = std::make_unique<ops::UnpackLayer>();
783
784   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
785
786   fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
787
788   _return_fn = std::move(fn);
789 }
790
791 void KernelGenerator::visit(const ir::operation::Pad &node)
792 {
793   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
794   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
795   const auto output_index{node.getOutputs().at(0)};
796   assert(_ctx.at(pad_index).data());
797
798   auto input = _tensor_reg->getPortableTensor(input_index);
799   auto output = _tensor_reg->getPortableTensor(output_index);
800   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
801   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
802
803   auto fn = std::make_unique<ops::PadLayer>();
804
805   bool isPadV2 = node.getInputs().size() == 3 ? true : false;
806   const void *value = nullptr;
807
808   if (isPadV2)
809   {
810     const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
811     value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
812   }
813
814   fn->configure(input, output, pad_base, pad_rank, value);
815   _return_fn = std::move(fn);
816 }
817
818 void KernelGenerator::visit(const ir::operation::Transpose &node)
819 {
820   const auto output_index{node.getOutputs().at(0)};
821   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
822   const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
823
824   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
825   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
826   auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
827
828   auto fn = std::make_unique<ops::TransposeLayer>();
829
830   fn->configure(input_tensor, perm_tensor, output_tensor);
831
832   _return_fn = std::move(fn);
833 }
834
835 void KernelGenerator::visit(const ir::operation::Reduce &node)
836 {
837   const auto output_index{node.getOutputs().at(0)};
838   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
839   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
840
841   const auto keep_dims = node.param().keep_dims;
842   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
843   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
844   auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
845
846   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
847   {
848     auto fn = std::make_unique<ops::MeanLayer>();
849
850     fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
851
852     _return_fn = std::move(fn);
853   }
854   else
855   {
856     auto fn = std::make_unique<ops::ReduceLayer>();
857
858     const auto reduce_type = convertReduceType(node.param().reduce_type);
859     fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
860
861     _return_fn = std::move(fn);
862   }
863 }
864
865 void KernelGenerator::visit(const ir::operation::Select &node)
866 {
867   const auto output_index{node.getOutputs().at(0)};
868   const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
869   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
870   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
871
872   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
873   auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
874   auto true_tensor = _tensor_reg->getPortableTensor(true_index);
875   auto false_tensor = _tensor_reg->getPortableTensor(false_index);
876
877   auto fn = std::make_unique<ops::SelectLayer>();
878
879   fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
880
881   _return_fn = std::move(fn);
882 }
883
884 void KernelGenerator::visit(const ir::operation::Slice &node)
885 {
886   const auto output_index{node.getOutputs().at(0)};
887   const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
888   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
889   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
890
891   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
892   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
893   auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
894   auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
895
896   auto fn = std::make_unique<ops::SliceLayer>();
897
898   fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
899
900   _return_fn = std::move(fn);
901 }
902
903 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
904 {
905   const auto output_index{node.getOutputs().at(0)};
906   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
907   const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
908   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
909   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
910
911   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
912   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
913   auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
914   auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
915   auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
916
917   auto begin_mask = node.param().begin_mask;
918   auto end_mask = node.param().end_mask;
919   auto shrink_axis_mask = node.param().shrink_axis_mask;
920
921   auto fn = std::make_unique<ops::StridedSliceLayer>();
922
923   fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
924                 end_mask, shrink_axis_mask);
925
926   _return_fn = std::move(fn);
927 }
928
929 void KernelGenerator::visit(const ir::operation::Split &node)
930 {
931   const auto num_splits = node.param().num_splits;
932   assert(num_splits == static_cast<int>(node.getOutputs().size()));
933
934   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
935   const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
936
937   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
938   auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
939
940   std::vector<IPortableTensor *> out_tensors;
941   for (auto &output_idx : node.getOutputs())
942     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
943
944   auto fn = std::make_unique<ops::SplitLayer>();
945
946   fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
947
948   _return_fn = std::move(fn);
949 }
950
951 void KernelGenerator::visit(const ir::operation::Shape &node)
952 {
953   const auto ofm_index{node.getOutputs().at(0)};
954   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
955
956   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
957   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
958
959   auto fn = std::make_unique<ops::ShapeLayer>();
960
961   fn->configure(ifm_tensor, ofm_tensor);
962
963   _return_fn = std::move(fn);
964 }
965
966 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
967 {
968   const auto output_index{node.getOutputs().at(0)};
969   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
970
971   auto align_corners = node.param().align_corners;
972   auto half_pixel_centers = node.param().half_pixel_centers;
973
974   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
975   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
976
977   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
978
979   if (node.getInputs().size() == 1)
980   {
981     fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
982                   align_corners, half_pixel_centers);
983   }
984   else
985   {
986     assert(node.getInputs().size() == 2);
987     const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
988     auto size_tensor = _tensor_reg->getPortableTensor(size_index);
989     if (size_tensor->is_constant())
990     {
991       auto size_vec = _ctx.at(size_index).asVector<int32_t>();
992       const auto height_out = size_vec[0];
993       const auto width_out = size_vec[1];
994       fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
995                     half_pixel_centers);
996     }
997     else
998     {
999       fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
1000     }
1001   }
1002
1003   _return_fn = std::move(fn);
1004 }
1005
1006 void KernelGenerator::visit(const ir::operation::Reverse &node)
1007 {
1008   const auto output_index{node.getOutputs().at(0)};
1009   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
1010   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
1011
1012   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1013   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1014   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1015
1016   auto fn = std::make_unique<ops::ReverseLayer>();
1017
1018   fn->configure(input_tensor, axis_tensor, output_tensor);
1019
1020   _return_fn = std::move(fn);
1021 }
1022
1023 void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1024 {
1025   const auto output_index{node.getOutputs().at(0)};
1026   const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
1027   const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
1028
1029   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1030   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1031   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1032
1033   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1034
1035   fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1036
1037   _return_fn = std::move(fn);
1038 }
1039
1040 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1041 {
1042   const auto ofm_index{node.getOutputs().at(0)};
1043   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1044
1045   const auto kh = node.param().kh;
1046   const auto kw = node.param().kw;
1047   const auto stride = node.param().stride;
1048   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
1049   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
1050   const auto padding =
1051     ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1052   const auto activation = node.param().activation;
1053
1054   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1055   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1056
1057   auto fn = std::make_unique<ops::PoolLayer>();
1058
1059   fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1060                 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1061                 convertPoolType(node.param().op_type));
1062
1063   _return_fn = std::move(fn);
1064 }
1065
1066 void KernelGenerator::visit(const ir::operation::Pow &node)
1067 {
1068   const auto output_index{node.getOutputs().at(0)};
1069   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1070   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1071
1072   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1073   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1074   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1075
1076   auto fn = std::make_unique<ops::PowLayer>();
1077
1078   fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1079
1080   _return_fn = std::move(fn);
1081 }
1082
1083 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1084 {
1085   const auto output_index{node.getOutputs().at(0)};
1086   const auto input_index{node.getInputs().at(0)};
1087
1088   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1089   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1090
1091   auto fn = std::make_unique<ops::L2NormLayer>();
1092
1093   fn->configure(input_alloc, output_alloc);
1094
1095   _return_fn = std::move(fn);
1096 }
1097
1098 void KernelGenerator::visit(const ir::operation::Range &node)
1099 {
1100   const auto output_index{node.getOutputs().at(0)};
1101   const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1102   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1103   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1104
1105   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1106   auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1107   auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1108   auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1109
1110   auto fn = std::make_unique<ops::RangeLayer>();
1111
1112   fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1113   _return_fn = std::move(fn);
1114 }
1115
1116 void KernelGenerator::visit(const ir::operation::Rank &node)
1117 {
1118   const auto ofm_index{node.getOutputs().at(0)};
1119   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1120
1121   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1122   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1123
1124   auto fn = std::make_unique<ops::RankLayer>();
1125
1126   fn->configure(ifm_tensor, ofm_tensor);
1127
1128   _return_fn = std::move(fn);
1129 }
1130
1131 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1132 {
1133   const auto ofm_index{node.getOutputs().at(0)};
1134   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1135   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1136
1137   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1138   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1139   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1140
1141   auto fn = std::make_unique<ops::SqDiffLayer>();
1142
1143   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1144   _return_fn = std::move(fn);
1145 }
1146
1147 void KernelGenerator::visit(const ir::operation::Tile &node)
1148 {
1149   const auto output_index{node.getOutputs().at(0)};
1150   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1151   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1152
1153   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1154   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1155   auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1156
1157   auto fn = std::make_unique<ops::TileLayer>();
1158
1159   fn->configure(input_tensor, multiples_tensor, output_tensor);
1160   _return_fn = std::move(fn);
1161 }
1162
1163 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1164 {
1165   const auto output_index{node.getOutputs().at(0)};
1166   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1167   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1168   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1169
1170   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1171   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1172   auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1173   auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1174
1175   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1176
1177   fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1178   _return_fn = std::move(fn);
1179 }
1180
1181 void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
1182 {
1183   using NMS = ir::operation::DetectionPostProcess;
1184
1185   ops::DetectionPostProcessLayer::DetectionPostProcessParameters parameters;
1186   parameters.scales.y = node.param().scale.y_scale;
1187   parameters.scales.x = node.param().scale.x_scale;
1188   parameters.scales.w = node.param().scale.w_scale;
1189   parameters.scales.h = node.param().scale.h_scale;
1190
1191   parameters.iou_threshold = node.param().iou_threshold;
1192   parameters.score_threshold = node.param().score_threshold;
1193   parameters.max_boxes_per_class = node.param().max_boxes_per_class;
1194   parameters.max_detections = node.param().max_detections;
1195   parameters.num_classes = node.param().num_classes;
1196   parameters.center_box_format = node.param().center_size_boxes;
1197   parameters.max_classes_per_detection = node.param().max_classes_per_detection;
1198
1199   auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
1200   auto scores_index = node.getInputs().at(NMS::Input::SCORES);
1201   auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
1202
1203   auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
1204   auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
1205   auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
1206   auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
1207
1208   parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
1209   parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
1210
1211   parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
1212   parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
1213   parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
1214
1215   parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
1216   parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
1217   parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
1218   parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
1219
1220   auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
1221   fn->configure(std::move(parameters));
1222
1223   _return_fn = std::move(fn);
1224 }
1225
1226 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1227 {
1228   const auto output_index{node.getOutputs().at(0)};
1229   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1230   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1231
1232   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1233   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1234   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1235
1236   const auto adj_x = node.param().adj_x;
1237   const auto adj_y = node.param().adj_y;
1238
1239   auto fn = std::make_unique<ops::BatchMatMulLayer>();
1240
1241   fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1242   _return_fn = std::move(fn);
1243 }
1244
1245 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1246 {
1247   const auto output_index{node.getOutputs().at(0)};
1248   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1249   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1250
1251   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1252   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1253   auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1254
1255   auto fn = std::make_unique<ops::BroadcastToLayer>();
1256
1257   fn->configure(input_tensor, shape_tensor, output_tensor);
1258
1259   _return_fn = std::move(fn);
1260 }
1261
1262 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1263 {
1264   const auto ofm_index{node.getOutputs().at(0)};
1265
1266   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1267   std::vector<const IPortableTensor *> input_tensors;
1268   for (auto &ifm_idx : node.getInputs())
1269     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1270
1271   const auto epsilon = node.param().epsilon;
1272   const auto is_training = node.param().is_training;
1273   const auto data_format = node.param().data_format;
1274
1275   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1276
1277   fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1278
1279   _return_fn = std::move(fn);
1280 }
1281
1282 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1283 {
1284   const auto output_index{node.getOutputs().at(0)};
1285   const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1286
1287   const auto beta = node.param().beta;
1288   const auto axis = node.param().axis;
1289
1290   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1291   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1292
1293   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1294
1295   fn->configure(input_tensor, beta, axis, output_tensor);
1296
1297   _return_fn = std::move(fn);
1298 }
1299
1300 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1301 {
1302   const auto output_index{node.getOutputs().at(0)};
1303   const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1304   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1305   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1306
1307   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1308   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1309   auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1310   auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1311
1312   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1313
1314   fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1315
1316   _return_fn = std::move(fn);
1317 }
1318
1319 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1320 {
1321   const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1322   const auto output_index{node.getOutputs().at(0)};
1323   auto block_size = node.param().block_size;
1324
1325   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1326   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1327
1328   auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1329
1330   fn->configure(input_tensor, block_size, output_tensor);
1331   _return_fn = std::move(fn);
1332 }
1333
1334 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1335 {
1336   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1337   const auto output_index{node.getOutputs().at(0)};
1338   auto block_size = node.param().block_size;
1339
1340   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1341   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1342
1343   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1344
1345   fn->configure(input_tensor, block_size, output_tensor);
1346   _return_fn = std::move(fn);
1347 }
1348
1349 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1350 {
1351   const auto output_index{node.getOutputs().at(0)};
1352   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1353   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1354
1355   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1356   auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1357   auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1358
1359   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1360
1361   fn->configure(shape_alloc, seed_alloc, output_alloc);
1362   _return_fn = std::move(fn);
1363 }
1364
1365 void KernelGenerator::visit(const ir::operation::SplitV &node)
1366 {
1367   const auto num_splits = node.param().num_splits;
1368   assert(num_splits == static_cast<int>(node.getOutputs().size()));
1369
1370   const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1371   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1372   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1373
1374   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1375   auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1376   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1377
1378   std::vector<IPortableTensor *> out_tensors;
1379   for (auto &output_idx : node.getOutputs())
1380     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1381
1382   auto fn = std::make_unique<ops::SplitVLayer>();
1383
1384   fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1385
1386   _return_fn = std::move(fn);
1387 }
1388
1389 void KernelGenerator::visit(const ir::operation::LSTM &node)
1390 {
1391   const auto scratch_buffer_index{
1392     node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1393   const auto output_state_out_index{
1394     node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1395   const auto cell_state_out_index{
1396     node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1397   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1398
1399   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1400   const auto input_to_input_weights_index{
1401     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1402   const auto input_to_forget_weights_index{
1403     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1404   const auto input_to_cell_weights_index{
1405     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1406   const auto input_to_output_weights_index{
1407     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1408   const auto recurrent_to_input_weights_index{
1409     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1410   const auto recurrent_to_forget_weights_index{
1411     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1412   const auto recurrent_to_cell_weights_index{
1413     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1414   const auto recurrent_to_output_weights_index{
1415     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1416   const auto cell_to_input_weights_index{
1417     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1418   const auto cell_to_forget_weights_index{
1419     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1420   const auto cell_to_output_weights_index{
1421     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1422   const auto input_gate_bias_index{
1423     node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1424   const auto forget_gate_bias_index{
1425     node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1426   const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1427   const auto output_gate_bias_index{
1428     node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1429   const auto projection_weights_index{
1430     node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1431   const auto projection_bias_index{
1432     node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1433   const auto output_state_in_index{
1434     node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1435   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1436   const auto time_major = node.param().time_major;
1437
1438   // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1439   // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1440   // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1441   // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1442   bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1443                                     (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1444                                      _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1445   bool has_recurrent_to_input_weights =
1446     _ctx.exist(recurrent_to_input_weights_index) &&
1447     (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1448      _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1449
1450   // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1451   // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1452   // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1453   // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1454   bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1455                                     _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1456   bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1457                                     _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1458
1459   bool has_input_gate_bias =
1460     _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1461
1462   bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1463                                 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1464                                  _ctx.at(projection_weights_index).shape().dim(1) != 0);
1465   bool has_projection_bias =
1466     _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1467
1468   auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1469                                  ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1470                                  : nullptr; // optional
1471   auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1472                                    ? _tensor_reg->getPortableTensor(output_state_out_index)
1473                                    : nullptr; // optional
1474   auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1475                                  ? _tensor_reg->getPortableTensor(cell_state_out_index)
1476                                  : nullptr; // optional
1477   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1478
1479   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1480
1481   auto input_to_input_weights_tensor =
1482     has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1483                                : nullptr; // optional
1484   auto input_to_forget_weights_tensor =
1485     _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1486   auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1487   auto input_to_output_weights_tensor =
1488     _tensor_reg->getPortableTensor(input_to_output_weights_index);
1489   auto recurrent_to_input_weights_tensor =
1490     has_recurrent_to_input_weights
1491       ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1492       : nullptr; // optional
1493   auto recurrent_to_forget_weights_tensor =
1494     _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1495   auto recurrent_to_cell_weights_tensor =
1496     _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1497   auto recurrent_to_output_weights_tensor =
1498     _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1499
1500   auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1501   auto cell_to_forget_weights_tensor =
1502     has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1503                                : nullptr; // optional
1504   auto cell_to_output_weights_tensor =
1505     has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1506                                : nullptr; // optional
1507
1508   auto input_gate_bias_tensor =
1509     has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1510   auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1511   auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1512   auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1513   auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1514   auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1515
1516   auto projection_weights_tensor = has_projection_weights
1517                                      ? _tensor_reg->getPortableTensor(projection_weights_index)
1518                                      : nullptr; // optional
1519   auto projection_bias_tensor = has_projection_bias
1520                                   ? _tensor_reg->getPortableTensor(projection_bias_index)
1521                                   : nullptr; // optional
1522
1523   IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1524   IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1525   IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1526   IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1527   if (node.getInputs().size() == 24)
1528   {
1529     const auto input_layer_norm_weights_index{
1530       node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1531     const auto forget_layer_norm_weights_index{
1532       node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1533     const auto cell_layer_norm_weights_index{
1534       node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1535     const auto output_layer_norm_weights_index{
1536       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1537
1538     input_layer_norm_weights_tensor =
1539       _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1540     forget_layer_norm_weights_tensor =
1541       _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1542     cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1543     output_layer_norm_weights_tensor =
1544       _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1545   }
1546
1547   auto fn = std::make_unique<ops::LSTMLayer>();
1548
1549   fn->configure(
1550     input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1551     input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
1552     recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
1553     recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
1554     cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
1555     forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
1556     output_layer_norm_weights_tensor,
1557     /*aux_input=*/nullptr,
1558     /*aux_input_to_input_weights=*/nullptr,
1559     /*aux_input_to_forget_weights=*/nullptr,
1560     /*aux_input_to_cell_weights=*/nullptr,
1561     /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1562     cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1563     projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1564     /*forward_sequence=*/true, time_major,
1565     /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1566     output_tensor,
1567     !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1568     !_ctx.at(cell_state_in_index).info().isVariable());
1569
1570   _return_fn = std::move(fn);
1571 }
1572
1573 } // namespace cpu
1574 } // namespace backend
1575 } // namespace onert