Imported Upstream version 1.25.0
[platform/core/ml/nnfw.git] / runtime / onert / backend / cpu / KernelGenerator.cc
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "KernelGenerator.h"
18
19 #include "ops/AddNLayer.h"
20 #include "ops/ArgMinMaxLayer.h"
21 #include "ops/BatchToSpaceNDLayer.h"
22 #include "ops/BinaryArithmeticLayer.h"
23 #include "ops/CompareLayer.h"
24 #include "ops/ConcatLayer.h"
25 #include "ops/ConvolutionLayer.h"
26 #include "ops/DepthToSpaceLayer.h"
27 #include "ops/DepthwiseConvolutionLayer.h"
28 #include "ops/EinsumLayer.h"
29 #include "ops/ElementwiseActivationLayer.h"
30 #include "ops/ElementwiseBinaryLayer.h"
31 #include "ops/ElementwiseUnaryLayer.h"
32 #include "ops/ExpandDimsLayer.h"
33 #include "ops/FillLayer.h"
34 #include "ops/FullyConnectedLayer.h"
35 #include "ops/GatherLayer.h"
36 #include "ops/LSTMLayer.h"
37 #include "ops/MeanLayer.h"
38 #include "ops/DetectionPostProcessLayer.h"
39 #include "ops/OneHotLayer.h"
40 #include "ops/OperationUtils.h"
41 #include "ops/PackLayer.h"
42 #include "ops/PadLayer.h"
43 #include "ops/PoolLayer.h"
44 #include "ops/PowLayer.h"
45 #include "ops/QuantizeLayer.h"
46 #include "ops/RangeLayer.h"
47 #include "ops/RankLayer.h"
48 #include "ops/ReduceLayer.h"
49 #include "ops/ReshapeLayer.h"
50 #include "ops/ResizeBilinearLayer.h"
51 #include "ops/ReverseLayer.h"
52 #include "ops/SelectLayer.h"
53 #include "ops/ShapeLayer.h"
54 #include "ops/SliceLayer.h"
55 #include "ops/SoftMaxLayer.h"
56 #include "ops/StridedSliceLayer.h"
57 #include "ops/SpaceToBatchNDLayer.h"
58 #include "ops/SpaceToDepthLayer.h"
59 #include "ops/SplitLayer.h"
60 #include "ops/SplitVLayer.h"
61 #include "ops/TileLayer.h"
62 #include "ops/TransposeLayer.h"
63 #include "ops/UnpackLayer.h"
64 #include "ops/SquaredDiffLayer.h"
65 #include "ops/L2NormLayer.h"
66 #include "ops/MatrixBandPartLayer.h"
67 #include "ops/BatchMatMulLayer.h"
68 #include "ops/BroadcastToLayer.h"
69 #include "ops/FusedBatchNormLayer.h"
70 #include "ops/LogSoftMaxLayer.h"
71 #include "ops/StatelessRandomUniformLayer.h"
72
73 #include <backend/Backend.h>
74 #include <backend/IConfig.h>
75 #include <memory>
76 #include <util/Utils.h>
77 #include <util/logging.h>
78 #include <exec/DynamicShapeInferer.h>
79
80 #include <stdexcept>
81
82 namespace onert
83 {
84 namespace backend
85 {
86 namespace cpu
87 {
88
89 namespace
90 {
91 ops::ArithmeticType
92 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
93 {
94   switch (arithmetic_type_ir)
95   {
96     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
97       return ops::ArithmeticType::kAdd;
98     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
99       return ops::ArithmeticType::kSub;
100     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
101       return ops::ArithmeticType::kMul;
102     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
103       return ops::ArithmeticType::kDiv;
104     default:
105       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
106   }
107 }
108
109 ops::ElementwiseActivationType
110 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
111 {
112   switch (type_ir)
113   {
114     case ir::operation::ElementwiseActivation::Type::ELU:
115       return ops::ElementwiseActivationType::kElu;
116     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
117       return ops::ElementwiseActivationType::kLogistic;
118     case ir::operation::ElementwiseActivation::Type::RELU:
119       return ops::ElementwiseActivationType::kReLU;
120     case ir::operation::ElementwiseActivation::Type::TANH:
121       return ops::ElementwiseActivationType::kTanh;
122     case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
123       return ops::ElementwiseActivationType::kLeakyReLU;
124     default:
125       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
126   }
127 }
128
129 ops::ElementwiseBinaryType
130 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
131 {
132   switch (type_ir)
133   {
134     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::FLOOR_DIV:
135       return ops::ElementwiseBinaryType::kFloorDiv;
136     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
137       return ops::ElementwiseBinaryType::kLogicalAnd;
138     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
139       return ops::ElementwiseBinaryType::kLogicalOr;
140     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
141       return ops::ElementwiseBinaryType::kMax;
142     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
143       return ops::ElementwiseBinaryType::kMin;
144     default:
145       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
146   }
147 }
148
149 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
150 {
151   switch (type_ir)
152   {
153     case ir::operation::ElementwiseUnary::Type::ABS:
154       return ops::ElementwiseUnaryType::kAbs;
155     case ir::operation::ElementwiseUnary::Type::CAST:
156       return ops::ElementwiseUnaryType::kCast;
157     case ir::operation::ElementwiseUnary::Type::COS:
158       return ops::ElementwiseUnaryType::kCos;
159     case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
160       return ops::ElementwiseUnaryType::kDequantize;
161     case ir::operation::ElementwiseUnary::Type::ERF:
162       return ops::ElementwiseUnaryType::kErf;
163     case ir::operation::ElementwiseUnary::Type::EXP:
164       return ops::ElementwiseUnaryType::kExp;
165     case ir::operation::ElementwiseUnary::Type::FLOOR:
166       return ops::ElementwiseUnaryType::kFloor;
167     case ir::operation::ElementwiseUnary::Type::LOG:
168       return ops::ElementwiseUnaryType::kLog;
169     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
170       return ops::ElementwiseUnaryType::kLogicalNot;
171     case ir::operation::ElementwiseUnary::Type::NEG:
172       return ops::ElementwiseUnaryType::kNeg;
173     case ir::operation::ElementwiseUnary::Type::QUANTIZE:
174       return ops::ElementwiseUnaryType::kQuantize;
175     case ir::operation::ElementwiseUnary::Type::ROUND:
176       return ops::ElementwiseUnaryType::kRound;
177     case ir::operation::ElementwiseUnary::Type::RSQRT:
178       return ops::ElementwiseUnaryType::kRSqrt;
179     case ir::operation::ElementwiseUnary::Type::SIN:
180       return ops::ElementwiseUnaryType::kSin;
181     case ir::operation::ElementwiseUnary::Type::SQRT:
182       return ops::ElementwiseUnaryType::kSqrt;
183     case ir::operation::ElementwiseUnary::Type::SQUARE:
184       return ops::ElementwiseUnaryType::kSquare;
185     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
186       return ops::ElementwiseUnaryType::kZerosLike;
187     default:
188       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
189   }
190 }
191
192 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
193 {
194   switch (type_ir)
195   {
196     case ir::operation::Pool2D::PoolType::AVG:
197       return ops::PoolType::kAvg;
198     case ir::operation::Pool2D::PoolType::MAX:
199       return ops::PoolType::kMax;
200     default:
201       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
202   }
203 }
204
205 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
206 {
207   switch (reduce_type_ir)
208   {
209     case ir::operation::Reduce::ReduceType::ALL:
210       return ops::ReduceType::kAll;
211     case ir::operation::Reduce::ReduceType::ANY:
212       return ops::ReduceType::kAny;
213     case ir::operation::Reduce::ReduceType::MAX:
214       return ops::ReduceType::kMax;
215     case ir::operation::Reduce::ReduceType::MIN:
216       return ops::ReduceType::kMin;
217     case ir::operation::Reduce::ReduceType::PROD:
218       return ops::ReduceType::kProd;
219     case ir::operation::Reduce::ReduceType::SUM:
220       return ops::ReduceType::kSum;
221     default:
222       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
223   }
224 }
225 } // namespace
226
227 KernelGenerator::KernelGenerator(
228   const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
229   const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
230   const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
231   const std::shared_ptr<ExternalContext> &external_context)
232   : basic::KernelGeneratorBase{graph},
233     _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
234     _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
235     _external_context(external_context)
236 {
237   // DO NOTHING
238 }
239
240 std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
241 {
242   auto ret = std::make_unique<exec::FunctionSequence>();
243
244   assert(_tensor_builder->dynamicTensorManager());
245   assert(_tensor_reg);
246
247   // Prepare to handle dynamic tensors later
248   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
249   {
250     dyn_ctx->op = &_operations_ctx.at(ind);
251     dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
252   }
253   ret->dynamic_tensor_ctx(dyn_ctx);
254
255   auto &op = _graph.operations().at(ind);
256   op.accept(*this);
257   assert(_return_fn); // _return_fn must have been generated
258   ret->append(std::move(_return_fn));
259
260   for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
261   {
262     auto portable_tensor = _tensor_reg->getPortableTensor(ind);
263     if (portable_tensor)
264     {
265       assert(portable_tensor->layout() == ir::Layout::NHWC);
266     }
267
268     auto tensor = _tensor_reg->getNativeTensor(ind);
269     if (tensor)
270     {
271       tensor->increase_ref();
272     }
273   }
274   return ret;
275 }
276
277 void KernelGenerator::visit(const ir::operation::AddN &node)
278 {
279   const auto output_index{node.getOutputs().at(0)};
280
281   std::vector<const IPortableTensor *> input_tensors;
282   for (const auto &input_idx : node.getInputs())
283     input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
284
285   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
286
287   auto fn = std::make_unique<ops::AddNLayer>();
288
289   fn->configure(std::move(input_tensors), output_tensor);
290
291   _return_fn = std::move(fn);
292 }
293
294 void KernelGenerator::visit(const ir::operation::Conv2D &node)
295 {
296   using ir::operation::Conv2D;
297
298   const auto ofm_index{node.getOutputs().at(0)};
299   const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
300   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
301   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
302
303   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
304   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
305   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
306   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
307
308   const auto stride = node.param().stride;
309   const auto activation = node.param().activation;
310   const auto param_padding = node.param().padding;
311   const auto dilation = node.param().dilation;
312   auto fn = std::make_unique<ops::ConvolutionLayer>();
313
314   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
315   {
316     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
317                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
318                   stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
319                   activation, ofm_tensor);
320
321     _return_fn = std::move(fn);
322     return;
323   }
324   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
325   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
326   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
327   const auto &ker_shape = _ctx.at(ker_index).shape();
328   const auto ker_height = ker_shape.dim(1);
329   const auto ker_width = ker_shape.dim(2);
330
331   const auto padding =
332     ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
333                          dilation.width_factor, dilation.height_factor);
334
335   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
336                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
337                 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
338
339   _return_fn = std::move(fn);
340 }
341
342 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
343 {
344   using ir::operation::DepthwiseConv2D;
345
346   const auto ofm_index{node.getOutputs().at(0)};
347   const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
348   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
349   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
350
351   const auto stride = node.param().stride;
352   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
353   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
354   // Kernel format is [1, kernel_height, kernel_width, depth_out].
355   const auto &ker_shape = _ctx.at(ker_index).shape();
356   const auto ker_height = ker_shape.dim(1);
357   const auto ker_width = ker_shape.dim(2);
358   const auto dilation_width = node.param().dilation.width_factor;
359   const auto dilation_height = node.param().dilation.height_factor;
360   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
361                                             ker_width, ker_height, dilation_width, dilation_height);
362   const auto multiplier = node.param().multiplier;
363   const auto activation = node.param().activation;
364
365   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
366   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
367   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
368   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
369
370   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
371
372   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
373                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
374                 dilation_height, activation, ofm_tensor, _external_context);
375
376   _return_fn = std::move(fn);
377 }
378
379 void KernelGenerator::visit(const ir::operation::Concat &node)
380 {
381   const auto ofm_index{node.getOutputs().at(0)};
382
383   const auto rank = _ctx.at(ofm_index).shape().rank();
384   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
385
386   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
387
388   std::vector<const IPortableTensor *> input_tensors;
389   for (const auto &ifm_idx : node.getInputs())
390     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
391
392   auto fn = std::make_unique<ops::ConcatLayer>();
393
394   fn->configure(input_tensors, axis, output_tensor);
395
396   _return_fn = std::move(fn);
397 }
398
399 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
400 {
401   const auto output_index{node.getOutputs().at(0)};
402   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
403   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
404
405   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
406   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
407   auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
408
409   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
410
411   IPortableTensor *crops_alloc = nullptr;
412   const auto NNApiInputs = 2;
413
414   if (node.getInputs().size() != NNApiInputs)
415   {
416     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
417     crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
418   }
419
420   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
421
422   _return_fn = std::move(fn);
423 }
424
425 void KernelGenerator::visit(const ir::operation::Fill &node)
426 {
427   const auto output_index{node.getOutputs().at(0)};
428   // SHAPE input is used for shape inference
429   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
430
431   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
432   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
433
434   auto fn = std::make_unique<ops::FillLayer>();
435
436   fn->configure(value_tensor, output_tensor);
437
438   _return_fn = std::move(fn);
439 }
440
441 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
442 {
443   using ir::operation::FullyConnected;
444
445   const auto output_index{node.getOutputs().at(0)};
446   const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
447   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
448   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
449   const auto activation = node.param().activation;
450   const auto weights_format = node.param().weights_format;
451
452   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
453   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
454   auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
455   auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
456
457   auto fn = std::make_unique<ops::FullyConnectedLayer>();
458
459   fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
460                 _external_context);
461
462   _return_fn = std::move(fn);
463 }
464
465 void KernelGenerator::visit(const ir::operation::Reshape &node)
466 {
467   const auto output_index{node.getOutputs().at(0)};
468   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
469
470   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
471   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
472
473   // optional 2nd input
474   IPortableTensor *shape_tensor = nullptr;
475
476   if (node.getInputs().size() == 2)
477   {
478     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
479     shape_tensor = _tensor_reg->getPortableTensor(shape_index);
480   }
481
482   auto fn = std::make_unique<ops::ReshapeLayer>();
483
484   fn->configure(input_tensor, shape_tensor, output_tensor);
485   _return_fn = std::move(fn);
486 }
487
488 void KernelGenerator::visit(const ir::operation::Squeeze &node)
489 {
490   const auto output_index{node.getOutputs().at(0)};
491   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
492
493   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
494   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
495
496   // Squeeze can share same kernel with reshape
497   auto fn = std::make_unique<ops::ReshapeLayer>();
498
499   fn->configure(input_tensor, nullptr, output_tensor);
500
501   _return_fn = std::move(fn);
502 }
503
504 void KernelGenerator::visit(const ir::operation::Softmax &node)
505 {
506   const auto output_index{node.getOutputs().at(0)};
507   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
508
509   const auto beta = node.param().beta;
510
511   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
512   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
513
514   auto fn = std::make_unique<ops::SoftMaxLayer>();
515
516   fn->configure(input_tensor, beta, output_tensor);
517
518   _return_fn = std::move(fn);
519 }
520
521 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
522 {
523   const auto ofm_index{node.getOutputs().at(0)};
524   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
525   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
526
527   const auto activation = node.param().activation;
528
529   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
530   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
531   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
532
533   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
534
535   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
536                 convertArithmeticType(node.param().arithmetic_type));
537
538   _return_fn = std::move(fn);
539 }
540
541 void KernelGenerator::visit(const ir::operation::Comparison &node)
542 {
543   const auto ofm_index{node.getOutputs().at(0)};
544   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
545   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
546
547   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
548   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
549   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
550
551   auto comparison_type = node.param().comparison_type;
552
553   auto fn = std::make_unique<ops::CompareLayer>();
554
555   fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
556
557   _return_fn = std::move(fn);
558 }
559
560 void KernelGenerator::visit(const ir::operation::Gather &node)
561 {
562   const auto output_index{node.getOutputs().at(0)};
563   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
564   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
565
566   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
567   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
568   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
569
570   const auto backend_layout = output_tensor->layout();
571   UNUSED_RELEASE(backend_layout);
572
573   // NOTE The frontend layout and backend layout must be the same for this operation.
574   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
575   //      is not not efficient even if it works well. If so, it would be better to set the
576   //      layout of these backend tensors to the same layout.
577   //      There is also one thing we have to think about. This operation depends on the layout of
578   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
579   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
580   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
581   assert(backend_layout == input_tensor->layout());
582   assert(backend_layout == indices_tensor->layout());
583   const auto &input_shape = _ctx.at(input_index).shape();
584   UNUSED_RELEASE(input_shape);
585   assert(input_shape.rank() < 4 || _current_layout == backend_layout);
586
587   const auto axis_raw = node.param().axis;
588   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
589
590   auto fn = std::make_unique<ops::GatherLayer>();
591
592   fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
593
594   _return_fn = std::move(fn);
595 }
596
597 void KernelGenerator::visit(const ir::operation::OneHot &node)
598 {
599   const auto output_index{node.getOutputs().at(0)};
600   const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
601   const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
602   const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
603   const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
604
605   const auto axis = node.param().axis;
606
607   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
608   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
609   auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
610   auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
611   auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
612
613   assert(indices_tensor->data_type() == OperandType::INT32);
614   assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
615
616   auto fn = std::make_unique<ops::OneHotLayer>();
617
618   fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
619
620   _return_fn = std::move(fn);
621 }
622
623 void KernelGenerator::visit(const ir::operation::Einsum &node)
624 {
625   const auto ofm_index{node.getOutputs().at(0)};
626
627   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
628   std::vector<const IPortableTensor *> input_tensors;
629   for (const auto &ifm_idx : node.getInputs())
630     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
631
632   const auto equation = node.param().equation;
633
634   auto fn = std::make_unique<ops::EinsumLayer>();
635
636   fn->configure(input_tensors, equation, output_tensor);
637
638   _return_fn = std::move(fn);
639 }
640
641 void KernelGenerator::visit(const ir::operation::Custom &node)
642 {
643   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
644                           std::vector<custom::TypeInfo> &types,
645                           std::vector<IPortableTensor *> &tensors) {
646     for (const auto &idx : opSeq)
647     {
648       const auto &operand = _ctx.at(idx);
649       // TODO make sure using `_current_layout` is correct for custom operations
650       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
651       auto in_tensor = _tensor_reg->getPortableTensor(idx);
652       tensors.emplace_back(in_tensor);
653     }
654   };
655
656   backend::custom::CustomKernelConfigParams params{};
657
658   fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
659   fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
660
661   params.userdata = node.userdata().data;
662   params.userdata_size = node.userdata().size;
663
664   auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
665
666   _return_fn = std::move(fn);
667 }
668
669 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
670 {
671   const auto output_index{node.getOutputs().at(0)};
672   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
673
674   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
675   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
676
677   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
678
679   fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
680                 convertElementwiseActivationType(node.param().op_type));
681
682   _return_fn = std::move(fn);
683 }
684
685 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
686 {
687   const auto output_index{node.getOutputs().at(0)};
688   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
689   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
690
691   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
692   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
693   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
694
695   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
696
697   fn->configure(lhs_tensor, rhs_tensor, output_tensor,
698                 convertElementwiseBinaryType(node.param().op_type));
699
700   _return_fn = std::move(fn);
701 }
702
703 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
704 {
705   const auto output_index{node.getOutputs().at(0)};
706   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
707
708   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
709   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
710
711   if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
712   {
713     auto fn = std::make_unique<ops::QuantizeLayer>();
714     fn->configure(input_tensor, output_tensor);
715     _return_fn = std::move(fn);
716   }
717   else
718   {
719     auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
720     fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
721     _return_fn = std::move(fn);
722   }
723 }
724
725 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
726 {
727   const auto output_index{node.getOutputs().at(0)};
728   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
729   // AXIS input is used for output shape inference
730
731   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
732   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
733
734   auto fn = std::make_unique<ops::ExpandDimsLayer>();
735
736   fn->configure(input_tensor, output_tensor);
737
738   _return_fn = std::move(fn);
739 }
740
741 void KernelGenerator::visit(const ir::operation::Pack &node)
742 {
743   const auto ofm_index{node.getOutputs().at(0)};
744
745   const auto rank = _ctx.at(ofm_index).shape().rank();
746   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
747
748   assert(-rank <= axis && axis < rank);
749
750   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
751
752   std::vector<const IPortableTensor *> input_tensors;
753   for (const auto &ifm_idx : node.getInputs())
754     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
755
756   auto fn = std::make_unique<ops::PackLayer>();
757
758   fn->configure(input_tensors, axis, output_tensor);
759
760   _return_fn = std::move(fn);
761 }
762
763 void KernelGenerator::visit(const ir::operation::Unpack &node)
764 {
765   const auto input_index{node.getInputs().at(0)};
766
767   const auto rank = _ctx.at(input_index).shape().rank();
768   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
769
770   assert(rank == 0 || (-rank <= axis && axis < rank));
771
772   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
773
774   std::vector<IPortableTensor *> output_tensors;
775   for (const auto &output_idx : node.getOutputs())
776     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
777
778   auto fn = std::make_unique<ops::UnpackLayer>();
779
780   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
781
782   fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
783
784   _return_fn = std::move(fn);
785 }
786
787 void KernelGenerator::visit(const ir::operation::Pad &node)
788 {
789   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
790   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
791   const auto output_index{node.getOutputs().at(0)};
792   assert(_ctx.at(pad_index).data());
793
794   auto input = _tensor_reg->getPortableTensor(input_index);
795   auto output = _tensor_reg->getPortableTensor(output_index);
796   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
797   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
798
799   auto fn = std::make_unique<ops::PadLayer>();
800
801   bool isPadV2 = node.getInputs().size() == 3 ? true : false;
802   const void *value = nullptr;
803
804   if (isPadV2)
805   {
806     const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
807     value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
808   }
809
810   fn->configure(input, output, pad_base, pad_rank, value);
811   _return_fn = std::move(fn);
812 }
813
814 void KernelGenerator::visit(const ir::operation::Transpose &node)
815 {
816   const auto output_index{node.getOutputs().at(0)};
817   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
818   const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
819
820   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
821   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
822   auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
823
824   auto fn = std::make_unique<ops::TransposeLayer>();
825
826   fn->configure(input_tensor, perm_tensor, output_tensor);
827
828   _return_fn = std::move(fn);
829 }
830
831 void KernelGenerator::visit(const ir::operation::Reduce &node)
832 {
833   const auto output_index{node.getOutputs().at(0)};
834   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
835   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
836
837   const auto keep_dims = node.param().keep_dims;
838   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
839   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
840   auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
841
842   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
843   {
844     auto fn = std::make_unique<ops::MeanLayer>();
845
846     fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
847
848     _return_fn = std::move(fn);
849   }
850   else
851   {
852     auto fn = std::make_unique<ops::ReduceLayer>();
853
854     const auto reduce_type = convertReduceType(node.param().reduce_type);
855     fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
856
857     _return_fn = std::move(fn);
858   }
859 }
860
861 void KernelGenerator::visit(const ir::operation::Select &node)
862 {
863   const auto output_index{node.getOutputs().at(0)};
864   const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
865   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
866   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
867
868   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
869   auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
870   auto true_tensor = _tensor_reg->getPortableTensor(true_index);
871   auto false_tensor = _tensor_reg->getPortableTensor(false_index);
872
873   auto fn = std::make_unique<ops::SelectLayer>();
874
875   fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
876
877   _return_fn = std::move(fn);
878 }
879
880 void KernelGenerator::visit(const ir::operation::Slice &node)
881 {
882   const auto output_index{node.getOutputs().at(0)};
883   const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
884   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
885   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
886
887   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
888   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
889   auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
890   auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
891
892   auto fn = std::make_unique<ops::SliceLayer>();
893
894   fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
895
896   _return_fn = std::move(fn);
897 }
898
899 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
900 {
901   const auto output_index{node.getOutputs().at(0)};
902   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
903   const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
904   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
905   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
906
907   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
908   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
909   auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
910   auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
911   auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
912
913   auto begin_mask = node.param().begin_mask;
914   auto end_mask = node.param().end_mask;
915   auto shrink_axis_mask = node.param().shrink_axis_mask;
916
917   auto fn = std::make_unique<ops::StridedSliceLayer>();
918
919   fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
920                 end_mask, shrink_axis_mask);
921
922   _return_fn = std::move(fn);
923 }
924
925 void KernelGenerator::visit(const ir::operation::Split &node)
926 {
927   const auto num_splits = node.param().num_splits;
928   assert(num_splits == static_cast<int>(node.getOutputs().size()));
929
930   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
931   const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
932
933   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
934   auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
935
936   std::vector<IPortableTensor *> out_tensors;
937   for (const auto &output_idx : node.getOutputs())
938     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
939
940   auto fn = std::make_unique<ops::SplitLayer>();
941
942   fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
943
944   _return_fn = std::move(fn);
945 }
946
947 void KernelGenerator::visit(const ir::operation::Shape &node)
948 {
949   const auto ofm_index{node.getOutputs().at(0)};
950   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
951
952   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
953   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
954
955   auto fn = std::make_unique<ops::ShapeLayer>();
956
957   fn->configure(ifm_tensor, ofm_tensor);
958
959   _return_fn = std::move(fn);
960 }
961
962 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
963 {
964   const auto output_index{node.getOutputs().at(0)};
965   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
966
967   auto align_corners = node.param().align_corners;
968   auto half_pixel_centers = node.param().half_pixel_centers;
969
970   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
971   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
972
973   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
974
975   if (node.getInputs().size() == 1)
976   {
977     fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
978                   align_corners, half_pixel_centers);
979   }
980   else
981   {
982     assert(node.getInputs().size() == 2);
983     const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
984     auto size_tensor = _tensor_reg->getPortableTensor(size_index);
985     if (size_tensor->is_constant())
986     {
987       auto size_vec = _ctx.at(size_index).asVector<int32_t>();
988       const auto height_out = size_vec[0];
989       const auto width_out = size_vec[1];
990       fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
991                     half_pixel_centers);
992     }
993     else
994     {
995       fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
996     }
997   }
998
999   _return_fn = std::move(fn);
1000 }
1001
1002 void KernelGenerator::visit(const ir::operation::Reverse &node)
1003 {
1004   const auto output_index{node.getOutputs().at(0)};
1005   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
1006   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
1007
1008   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1009   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1010   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1011
1012   auto fn = std::make_unique<ops::ReverseLayer>();
1013
1014   fn->configure(input_tensor, axis_tensor, output_tensor);
1015
1016   _return_fn = std::move(fn);
1017 }
1018
1019 void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1020 {
1021   const auto output_index{node.getOutputs().at(0)};
1022   const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
1023   const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
1024
1025   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1026   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1027   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1028
1029   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1030
1031   fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1032
1033   _return_fn = std::move(fn);
1034 }
1035
1036 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1037 {
1038   const auto ofm_index{node.getOutputs().at(0)};
1039   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1040
1041   const auto kh = node.param().kh;
1042   const auto kw = node.param().kw;
1043   const auto stride = node.param().stride;
1044   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
1045   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
1046   const auto padding =
1047     ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1048   const auto activation = node.param().activation;
1049
1050   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1051   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1052
1053   auto fn = std::make_unique<ops::PoolLayer>();
1054
1055   fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1056                 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1057                 convertPoolType(node.param().op_type));
1058
1059   _return_fn = std::move(fn);
1060 }
1061
1062 void KernelGenerator::visit(const ir::operation::Pow &node)
1063 {
1064   const auto output_index{node.getOutputs().at(0)};
1065   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1066   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1067
1068   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1069   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1070   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1071
1072   auto fn = std::make_unique<ops::PowLayer>();
1073
1074   fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1075
1076   _return_fn = std::move(fn);
1077 }
1078
1079 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1080 {
1081   const auto output_index{node.getOutputs().at(0)};
1082   const auto input_index{node.getInputs().at(0)};
1083
1084   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1085   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1086
1087   auto fn = std::make_unique<ops::L2NormLayer>();
1088
1089   fn->configure(input_alloc, output_alloc);
1090
1091   _return_fn = std::move(fn);
1092 }
1093
1094 void KernelGenerator::visit(const ir::operation::Range &node)
1095 {
1096   const auto output_index{node.getOutputs().at(0)};
1097   const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1098   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1099   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1100
1101   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1102   auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1103   auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1104   auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1105
1106   auto fn = std::make_unique<ops::RangeLayer>();
1107
1108   fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1109   _return_fn = std::move(fn);
1110 }
1111
1112 void KernelGenerator::visit(const ir::operation::Rank &node)
1113 {
1114   const auto ofm_index{node.getOutputs().at(0)};
1115   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1116
1117   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1118   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1119
1120   auto fn = std::make_unique<ops::RankLayer>();
1121
1122   fn->configure(ifm_tensor, ofm_tensor);
1123
1124   _return_fn = std::move(fn);
1125 }
1126
1127 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1128 {
1129   const auto ofm_index{node.getOutputs().at(0)};
1130   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1131   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1132
1133   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1134   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1135   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1136
1137   auto fn = std::make_unique<ops::SqDiffLayer>();
1138
1139   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1140   _return_fn = std::move(fn);
1141 }
1142
1143 void KernelGenerator::visit(const ir::operation::Tile &node)
1144 {
1145   const auto output_index{node.getOutputs().at(0)};
1146   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1147   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1148
1149   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1150   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1151   auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1152
1153   auto fn = std::make_unique<ops::TileLayer>();
1154
1155   fn->configure(input_tensor, multiples_tensor, output_tensor);
1156   _return_fn = std::move(fn);
1157 }
1158
1159 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1160 {
1161   const auto output_index{node.getOutputs().at(0)};
1162   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1163   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1164   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1165
1166   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1167   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1168   auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1169   auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1170
1171   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1172
1173   fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1174   _return_fn = std::move(fn);
1175 }
1176
1177 void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
1178 {
1179   using NMS = ir::operation::DetectionPostProcess;
1180
1181   ops::DetectionPostProcessLayer::DetectionPostProcessParameters parameters;
1182   parameters.scales.y = node.param().scale.y_scale;
1183   parameters.scales.x = node.param().scale.x_scale;
1184   parameters.scales.w = node.param().scale.w_scale;
1185   parameters.scales.h = node.param().scale.h_scale;
1186
1187   parameters.iou_threshold = node.param().iou_threshold;
1188   parameters.score_threshold = node.param().score_threshold;
1189   parameters.max_boxes_per_class = node.param().max_boxes_per_class;
1190   parameters.max_detections = node.param().max_detections;
1191   parameters.num_classes = node.param().num_classes;
1192   parameters.center_box_format = node.param().center_size_boxes;
1193   parameters.max_classes_per_detection = node.param().max_classes_per_detection;
1194
1195   auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
1196   auto scores_index = node.getInputs().at(NMS::Input::SCORES);
1197   auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
1198
1199   auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
1200   auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
1201   auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
1202   auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
1203
1204   parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
1205   parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
1206
1207   parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
1208   parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
1209   parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
1210
1211   parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
1212   parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
1213   parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
1214   parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
1215
1216   auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
1217   fn->configure(std::move(parameters));
1218
1219   _return_fn = std::move(fn);
1220 }
1221
1222 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1223 {
1224   const auto output_index{node.getOutputs().at(0)};
1225   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1226   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1227
1228   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1229   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1230   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1231
1232   const auto adj_x = node.param().adj_x;
1233   const auto adj_y = node.param().adj_y;
1234
1235   auto fn = std::make_unique<ops::BatchMatMulLayer>();
1236
1237   fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1238   _return_fn = std::move(fn);
1239 }
1240
1241 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1242 {
1243   const auto output_index{node.getOutputs().at(0)};
1244   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1245   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1246
1247   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1248   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1249   auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1250
1251   auto fn = std::make_unique<ops::BroadcastToLayer>();
1252
1253   fn->configure(input_tensor, shape_tensor, output_tensor);
1254
1255   _return_fn = std::move(fn);
1256 }
1257
1258 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1259 {
1260   const auto ofm_index{node.getOutputs().at(0)};
1261
1262   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1263   std::vector<const IPortableTensor *> input_tensors;
1264   for (const auto &ifm_idx : node.getInputs())
1265     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1266
1267   const auto epsilon = node.param().epsilon;
1268   const auto is_training = node.param().is_training;
1269   const auto data_format = node.param().data_format;
1270
1271   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1272
1273   fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1274
1275   _return_fn = std::move(fn);
1276 }
1277
1278 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1279 {
1280   const auto output_index{node.getOutputs().at(0)};
1281   const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1282
1283   const auto beta = node.param().beta;
1284   const auto axis = node.param().axis;
1285
1286   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1287   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1288
1289   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1290
1291   fn->configure(input_tensor, beta, axis, output_tensor);
1292
1293   _return_fn = std::move(fn);
1294 }
1295
1296 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1297 {
1298   const auto output_index{node.getOutputs().at(0)};
1299   const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1300   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1301   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1302
1303   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1304   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1305   auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1306   auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1307
1308   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1309
1310   fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1311
1312   _return_fn = std::move(fn);
1313 }
1314
1315 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1316 {
1317   const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1318   const auto output_index{node.getOutputs().at(0)};
1319   auto block_size = node.param().block_size;
1320
1321   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1322   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1323
1324   auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1325
1326   fn->configure(input_tensor, block_size, output_tensor);
1327   _return_fn = std::move(fn);
1328 }
1329
1330 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1331 {
1332   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1333   const auto output_index{node.getOutputs().at(0)};
1334   auto block_size = node.param().block_size;
1335
1336   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1337   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1338
1339   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1340
1341   fn->configure(input_tensor, block_size, output_tensor);
1342   _return_fn = std::move(fn);
1343 }
1344
1345 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1346 {
1347   const auto output_index{node.getOutputs().at(0)};
1348   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1349   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1350
1351   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1352   auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1353   auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1354
1355   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1356
1357   fn->configure(shape_alloc, seed_alloc, output_alloc);
1358   _return_fn = std::move(fn);
1359 }
1360
1361 void KernelGenerator::visit(const ir::operation::SplitV &node)
1362 {
1363   const auto num_splits = node.param().num_splits;
1364   assert(num_splits == static_cast<int>(node.getOutputs().size()));
1365
1366   const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1367   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1368   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1369
1370   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1371   auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1372   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1373
1374   std::vector<IPortableTensor *> out_tensors;
1375   for (const auto &output_idx : node.getOutputs())
1376     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1377
1378   auto fn = std::make_unique<ops::SplitVLayer>();
1379
1380   fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1381
1382   _return_fn = std::move(fn);
1383 }
1384
1385 void KernelGenerator::visit(const ir::operation::LSTM &node)
1386 {
1387   const auto scratch_buffer_index{
1388     node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1389   const auto output_state_out_index{
1390     node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1391   const auto cell_state_out_index{
1392     node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1393   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1394
1395   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1396   const auto input_to_input_weights_index{
1397     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1398   const auto input_to_forget_weights_index{
1399     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1400   const auto input_to_cell_weights_index{
1401     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1402   const auto input_to_output_weights_index{
1403     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1404   const auto recurrent_to_input_weights_index{
1405     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1406   const auto recurrent_to_forget_weights_index{
1407     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1408   const auto recurrent_to_cell_weights_index{
1409     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1410   const auto recurrent_to_output_weights_index{
1411     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1412   const auto cell_to_input_weights_index{
1413     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1414   const auto cell_to_forget_weights_index{
1415     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1416   const auto cell_to_output_weights_index{
1417     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1418   const auto input_gate_bias_index{
1419     node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1420   const auto forget_gate_bias_index{
1421     node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1422   const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1423   const auto output_gate_bias_index{
1424     node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1425   const auto projection_weights_index{
1426     node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1427   const auto projection_bias_index{
1428     node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1429   const auto output_state_in_index{
1430     node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1431   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1432   const auto time_major = node.param().time_major;
1433
1434   // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1435   // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1436   // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1437   // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1438   bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1439                                     (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1440                                      _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1441   bool has_recurrent_to_input_weights =
1442     _ctx.exist(recurrent_to_input_weights_index) &&
1443     (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1444      _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1445
1446   // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1447   // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1448   // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1449   // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1450   bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1451                                     _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1452   bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1453                                     _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1454
1455   bool has_input_gate_bias =
1456     _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1457
1458   bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1459                                 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1460                                  _ctx.at(projection_weights_index).shape().dim(1) != 0);
1461   bool has_projection_bias =
1462     _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1463
1464   auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1465                                  ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1466                                  : nullptr; // optional
1467   auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1468                                    ? _tensor_reg->getPortableTensor(output_state_out_index)
1469                                    : nullptr; // optional
1470   auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1471                                  ? _tensor_reg->getPortableTensor(cell_state_out_index)
1472                                  : nullptr; // optional
1473   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1474
1475   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1476
1477   auto input_to_input_weights_tensor =
1478     has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1479                                : nullptr; // optional
1480   auto input_to_forget_weights_tensor =
1481     _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1482   auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1483   auto input_to_output_weights_tensor =
1484     _tensor_reg->getPortableTensor(input_to_output_weights_index);
1485   auto recurrent_to_input_weights_tensor =
1486     has_recurrent_to_input_weights
1487       ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1488       : nullptr; // optional
1489   auto recurrent_to_forget_weights_tensor =
1490     _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1491   auto recurrent_to_cell_weights_tensor =
1492     _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1493   auto recurrent_to_output_weights_tensor =
1494     _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1495
1496   auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1497   auto cell_to_forget_weights_tensor =
1498     has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1499                                : nullptr; // optional
1500   auto cell_to_output_weights_tensor =
1501     has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1502                                : nullptr; // optional
1503
1504   auto input_gate_bias_tensor =
1505     has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1506   auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1507   auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1508   auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1509   auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1510   auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1511
1512   auto projection_weights_tensor = has_projection_weights
1513                                      ? _tensor_reg->getPortableTensor(projection_weights_index)
1514                                      : nullptr; // optional
1515   auto projection_bias_tensor = has_projection_bias
1516                                   ? _tensor_reg->getPortableTensor(projection_bias_index)
1517                                   : nullptr; // optional
1518
1519   IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1520   IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1521   IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1522   IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1523   if (node.getInputs().size() == 24)
1524   {
1525     const auto input_layer_norm_weights_index{
1526       node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1527     const auto forget_layer_norm_weights_index{
1528       node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1529     const auto cell_layer_norm_weights_index{
1530       node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1531     const auto output_layer_norm_weights_index{
1532       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1533
1534     input_layer_norm_weights_tensor =
1535       _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1536     forget_layer_norm_weights_tensor =
1537       _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1538     cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1539     output_layer_norm_weights_tensor =
1540       _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1541   }
1542
1543   auto fn = std::make_unique<ops::LSTMLayer>();
1544
1545   fn->configure(
1546     input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1547     input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
1548     recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
1549     recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
1550     cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
1551     forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
1552     output_layer_norm_weights_tensor,
1553     /*aux_input=*/nullptr,
1554     /*aux_input_to_input_weights=*/nullptr,
1555     /*aux_input_to_forget_weights=*/nullptr,
1556     /*aux_input_to_cell_weights=*/nullptr,
1557     /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1558     cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1559     projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1560     /*forward_sequence=*/true, time_major,
1561     /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1562     output_tensor,
1563     !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1564     !_ctx.at(cell_state_in_index).info().isVariable());
1565
1566   _return_fn = std::move(fn);
1567 }
1568
1569 } // namespace cpu
1570 } // namespace backend
1571 } // namespace onert