451815b6530bb35706cc2deed1cfdce2367c7cdd
[platform/core/ml/nnfw.git] / runtime / onert / backend / cpu / KernelGenerator.cc
1 /*
2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "KernelGenerator.h"
18
19 #include "ops/AddNLayer.h"
20 #include "ops/ArgMinMaxLayer.h"
21 #include "ops/BatchToSpaceNDLayer.h"
22 #include "ops/BinaryArithmeticLayer.h"
23 #include "ops/CompareLayer.h"
24 #include "ops/ConcatLayer.h"
25 #include "ops/ConvolutionLayer.h"
26 #include "ops/DepthwiseConvolutionLayer.h"
27 #include "ops/EinsumLayer.h"
28 #include "ops/ElementwiseActivationLayer.h"
29 #include "ops/ElementwiseBinaryLayer.h"
30 #include "ops/ElementwiseUnaryLayer.h"
31 #include "ops/ExpandDimsLayer.h"
32 #include "ops/FillLayer.h"
33 #include "ops/FullyConnectedLayer.h"
34 #include "ops/GatherLayer.h"
35 #include "ops/LSTMLayer.h"
36 #include "ops/MeanLayer.h"
37 #include "ops/OneHotLayer.h"
38 #include "ops/OperationUtils.h"
39 #include "ops/PackLayer.h"
40 #include "ops/PadLayer.h"
41 #include "ops/PoolLayer.h"
42 #include "ops/PowLayer.h"
43 #include "ops/RangeLayer.h"
44 #include "ops/RankLayer.h"
45 #include "ops/ReduceLayer.h"
46 #include "ops/ReshapeLayer.h"
47 #include "ops/ResizeBilinearLayer.h"
48 #include "ops/ReverseLayer.h"
49 #include "ops/SelectLayer.h"
50 #include "ops/ShapeLayer.h"
51 #include "ops/SliceLayer.h"
52 #include "ops/SoftMaxLayer.h"
53 #include "ops/StridedSliceLayer.h"
54 #include "ops/SpaceToBatchNDLayer.h"
55 #include "ops/SpaceToDepthLayer.h"
56 #include "ops/SplitLayer.h"
57 #include "ops/SplitVLayer.h"
58 #include "ops/TileLayer.h"
59 #include "ops/TransposeLayer.h"
60 #include "ops/UnpackLayer.h"
61 #include "ops/SquaredDiffLayer.h"
62 #include "ops/L2NormLayer.h"
63 #include "ops/MatrixBandPartLayer.h"
64 #include "ops/BatchMatMulLayer.h"
65 #include "ops/BroadcastToLayer.h"
66 #include "ops/FusedBatchNormLayer.h"
67 #include "ops/LogSoftMaxLayer.h"
68 #include "ops/StatelessRandomUniformLayer.h"
69
70 #include <backend/Backend.h>
71 #include <backend/IConfig.h>
72 #include <memory>
73 #include <util/Utils.h>
74 #include <util/logging.h>
75 #include <exec/DynamicShapeInferer.h>
76
77 #include <stdexcept>
78
79 namespace onert
80 {
81 namespace backend
82 {
83 namespace cpu
84 {
85
86 namespace
87 {
88 ops::ArithmeticType
89 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
90 {
91   switch (arithmetic_type_ir)
92   {
93     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
94       return ops::ArithmeticType::kAdd;
95     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
96       return ops::ArithmeticType::kSub;
97     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
98       return ops::ArithmeticType::kMul;
99     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
100       return ops::ArithmeticType::kDiv;
101     default:
102       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
103   }
104 }
105
106 ops::ElementwiseActivationType
107 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
108 {
109   switch (type_ir)
110   {
111     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
112       return ops::ElementwiseActivationType::kLogistic;
113     case ir::operation::ElementwiseActivation::Type::RELU:
114       return ops::ElementwiseActivationType::kReLU;
115     case ir::operation::ElementwiseActivation::Type::TANH:
116       return ops::ElementwiseActivationType::kTanh;
117     default:
118       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
119   }
120 }
121
122 ops::ElementwiseBinaryType
123 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
124 {
125   switch (type_ir)
126   {
127     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
128       return ops::ElementwiseBinaryType::kLogicalOr;
129     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
130       return ops::ElementwiseBinaryType::kMax;
131     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
132       return ops::ElementwiseBinaryType::kMin;
133     default:
134       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
135   }
136 }
137
138 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
139 {
140   switch (type_ir)
141   {
142     case ir::operation::ElementwiseUnary::Type::ABS:
143       return ops::ElementwiseUnaryType::kAbs;
144     case ir::operation::ElementwiseUnary::Type::CAST:
145       return ops::ElementwiseUnaryType::kCast;
146     case ir::operation::ElementwiseUnary::Type::COS:
147       return ops::ElementwiseUnaryType::kCos;
148     case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
149       return ops::ElementwiseUnaryType::kDequantize;
150     case ir::operation::ElementwiseUnary::Type::ERF:
151       return ops::ElementwiseUnaryType::kErf;
152     case ir::operation::ElementwiseUnary::Type::EXP:
153       return ops::ElementwiseUnaryType::kExp;
154     case ir::operation::ElementwiseUnary::Type::FLOOR:
155       return ops::ElementwiseUnaryType::kFloor;
156     case ir::operation::ElementwiseUnary::Type::LOG:
157       return ops::ElementwiseUnaryType::kLog;
158     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
159       return ops::ElementwiseUnaryType::kLogicalNot;
160     case ir::operation::ElementwiseUnary::Type::NEG:
161       return ops::ElementwiseUnaryType::kNeg;
162     case ir::operation::ElementwiseUnary::Type::QUANTIZE:
163       return ops::ElementwiseUnaryType::kQuantize;
164     case ir::operation::ElementwiseUnary::Type::ROUND:
165       return ops::ElementwiseUnaryType::kRound;
166     case ir::operation::ElementwiseUnary::Type::RSQRT:
167       return ops::ElementwiseUnaryType::kRSqrt;
168     case ir::operation::ElementwiseUnary::Type::SIN:
169       return ops::ElementwiseUnaryType::kSin;
170     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
171       return ops::ElementwiseUnaryType::kZerosLike;
172     default:
173       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
174   }
175 }
176
177 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
178 {
179   switch (type_ir)
180   {
181     case ir::operation::Pool2D::PoolType::AVG:
182       return ops::PoolType::kAvg;
183     case ir::operation::Pool2D::PoolType::MAX:
184       return ops::PoolType::kMax;
185     default:
186       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
187   }
188 }
189
190 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
191 {
192   switch (reduce_type_ir)
193   {
194     case ir::operation::Reduce::ReduceType::ALL:
195       return ops::ReduceType::kAll;
196     case ir::operation::Reduce::ReduceType::ANY:
197       return ops::ReduceType::kAny;
198     case ir::operation::Reduce::ReduceType::MAX:
199       return ops::ReduceType::kMax;
200     case ir::operation::Reduce::ReduceType::MIN:
201       return ops::ReduceType::kMin;
202     case ir::operation::Reduce::ReduceType::PROD:
203       return ops::ReduceType::kProd;
204     case ir::operation::Reduce::ReduceType::SUM:
205       return ops::ReduceType::kSum;
206     default:
207       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
208   }
209 }
210 } // namespace
211
212 KernelGenerator::KernelGenerator(
213     const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
214     const std::shared_ptr<TensorBuilder> &tensor_builder,
215     const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
216     const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
217     const std::shared_ptr<ExternalContext> &external_context)
218     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
219       _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
220       _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
221 {
222   // DO NOTHING
223 }
224
225 void KernelGenerator::visit(const ir::operation::AddN &node)
226 {
227   const auto output_index{node.getOutputs().at(0)};
228
229   std::vector<const IPortableTensor *> input_tensors;
230   for (auto &input_idx : node.getInputs())
231     input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
232
233   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
234
235   auto fn = std::make_unique<ops::AddNLayer>();
236
237   fn->configure(std::move(input_tensors), output_tensor);
238
239   _return_fn = std::move(fn);
240 }
241
242 void KernelGenerator::visit(const ir::OpSequence &op_seq)
243 {
244   assert(!_return_fn_seq);
245   assert(_tensor_builder->dynamicTensorManager());
246   assert(_tensor_reg);
247
248   auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
249
250   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
251
252   // Prepare to handle dynamic tensors later
253   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
254   {
255     dyn_ctx->op_seq = &op_seq;
256     dyn_ctx->operations = &_operations_ctx;
257     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
258     dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
259
260     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
261   }
262
263   _current_op_seq_layout = op_seq.getLayout();
264   for (const auto &operation_idx : op_seq.operations())
265   {
266     const auto &node = _operations_ctx.at(operation_idx);
267     node.accept(*this);
268     _return_fn_seq->append(releaseFunction());
269
270     for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
271     {
272       auto portable_tensor = _tensor_reg->getPortableTensor(ind);
273       if (portable_tensor)
274       {
275         assert(portable_tensor->layout() == ir::Layout::NHWC);
276       }
277
278       auto tensor = _tensor_reg->getNativeTensor(ind);
279       if (tensor)
280       {
281         tensor->increase_ref();
282       }
283     }
284   }
285 }
286
287 void KernelGenerator::visit(const ir::operation::Conv2D &node)
288 {
289   using ir::operation::Conv2D;
290
291   const auto ofm_index{node.getOutputs().at(0)};
292   const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
293   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
294   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
295
296   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
297   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
298   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
299   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
300
301   const auto stride = node.param().stride;
302   const auto activation = node.param().activation;
303   const auto param_padding = node.param().padding;
304   const auto dilation = node.param().dilation;
305   auto fn = std::make_unique<ops::ConvolutionLayer>();
306
307   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
308   {
309     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
310                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
311                   stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
312                   activation, ofm_tensor);
313
314     _return_fn = std::move(fn);
315     return;
316   }
317   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
318   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
319   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
320   const auto &ker_shape = _ctx.at(ker_index).shape();
321   const auto ker_height = ker_shape.dim(1);
322   const auto ker_width = ker_shape.dim(2);
323
324   const auto padding =
325       ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
326                            dilation.width_factor, dilation.height_factor);
327
328   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
329                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
330                 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
331
332   _return_fn = std::move(fn);
333 }
334
335 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
336 {
337   using ir::operation::DepthwiseConv2D;
338
339   const auto ofm_index{node.getOutputs().at(0)};
340   const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
341   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
342   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
343
344   const auto stride = node.param().stride;
345   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
346   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
347   // Kernel format is [1, kernel_height, kernel_width, depth_out].
348   const auto &ker_shape = _ctx.at(ker_index).shape();
349   const auto ker_height = ker_shape.dim(1);
350   const auto ker_width = ker_shape.dim(2);
351   const auto dilation_width = node.param().dilation.width_factor;
352   const auto dilation_height = node.param().dilation.height_factor;
353   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
354                                             ker_width, ker_height, dilation_width, dilation_height);
355   const auto multiplier = node.param().multiplier;
356   const auto activation = node.param().activation;
357
358   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
359   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
360   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
361   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
362
363   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
364
365   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
366                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
367                 dilation_height, activation, ofm_tensor);
368
369   _return_fn = std::move(fn);
370 }
371
372 void KernelGenerator::visit(const ir::operation::Concat &node)
373 {
374   const auto ofm_index{node.getOutputs().at(0)};
375
376   const auto rank = _ctx.at(ofm_index).shape().rank();
377   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
378
379   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
380
381   std::vector<const IPortableTensor *> input_tensors;
382   for (auto &ifm_idx : node.getInputs())
383     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
384
385   auto fn = std::make_unique<ops::ConcatLayer>();
386
387   fn->configure(input_tensors, axis, output_tensor);
388
389   _return_fn = std::move(fn);
390 }
391
392 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
393 {
394   const auto output_index{node.getOutputs().at(0)};
395   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
396   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
397
398   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
399   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
400   auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
401
402   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
403
404   IPortableTensor *crops_alloc = nullptr;
405   const auto NNApiInputs = 2;
406
407   if (node.getInputs().size() != NNApiInputs)
408   {
409     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
410     crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
411   }
412
413   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
414
415   _return_fn = std::move(fn);
416 }
417
418 void KernelGenerator::visit(const ir::operation::Fill &node)
419 {
420   const auto output_index{node.getOutputs().at(0)};
421   const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
422   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
423
424   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
425   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
426   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
427
428   auto fn = std::make_unique<ops::FillLayer>();
429
430   fn->configure(input_tensor, value_tensor, output_tensor);
431
432   _return_fn = std::move(fn);
433 }
434
435 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
436 {
437   using ir::operation::FullyConnected;
438
439   const auto output_index{node.getOutputs().at(0)};
440   const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
441   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
442   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
443   const auto activation = node.param().activation;
444   const auto weights_format = node.param().weights_format;
445
446   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
447   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
448   auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
449   auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
450
451   auto fn = std::make_unique<ops::FullyConnectedLayer>();
452
453   fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
454                 _external_context);
455
456   _return_fn = std::move(fn);
457 }
458
459 void KernelGenerator::visit(const ir::operation::Reshape &node)
460 {
461   const auto output_index{node.getOutputs().at(0)};
462   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
463
464   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
465   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
466
467   // optional 2nd input
468   IPortableTensor *shape_tensor = nullptr;
469
470   if (node.getInputs().size() == 2)
471   {
472     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
473     shape_tensor = _tensor_reg->getPortableTensor(shape_index);
474   }
475
476   auto fn = std::make_unique<ops::ReshapeLayer>();
477
478   fn->configure(input_tensor, shape_tensor, output_tensor);
479   _return_fn = std::move(fn);
480 }
481
482 void KernelGenerator::visit(const ir::operation::Squeeze &node)
483 {
484   const auto output_index{node.getOutputs().at(0)};
485   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
486
487   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
488   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
489
490   // Squeeze can share same kernel with reshape
491   auto fn = std::make_unique<ops::ReshapeLayer>();
492
493   fn->configure(input_tensor, nullptr, output_tensor);
494
495   _return_fn = std::move(fn);
496 }
497
498 void KernelGenerator::visit(const ir::operation::Softmax &node)
499 {
500   const auto output_index{node.getOutputs().at(0)};
501   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
502
503   const auto beta = node.param().beta;
504
505   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
506   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
507
508   auto fn = std::make_unique<ops::SoftMaxLayer>();
509
510   fn->configure(input_tensor, beta, output_tensor);
511
512   _return_fn = std::move(fn);
513 }
514
515 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
516 {
517   const auto ofm_index{node.getOutputs().at(0)};
518   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
519   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
520
521   const auto activation = node.param().activation;
522
523   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
524   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
525   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
526
527   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
528
529   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
530                 convertArithmeticType(node.param().arithmetic_type));
531
532   _return_fn = std::move(fn);
533 }
534
535 void KernelGenerator::visit(const ir::operation::Comparison &node)
536 {
537   const auto ofm_index{node.getOutputs().at(0)};
538   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
539   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
540
541   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
542   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
543   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
544
545   auto comparison_type = node.param().comparison_type;
546
547   auto fn = std::make_unique<ops::CompareLayer>();
548
549   fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
550
551   _return_fn = std::move(fn);
552 }
553
554 void KernelGenerator::visit(const ir::operation::Gather &node)
555 {
556   const auto output_index{node.getOutputs().at(0)};
557   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
558   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
559
560   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
561   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
562   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
563
564   const auto backend_layout = output_tensor->layout();
565   UNUSED_RELEASE(backend_layout);
566
567   // NOTE The frontend layout and backend layout must be the same for this operation.
568   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
569   //      is not not efficient even if it works well. If so, it would be better to set the
570   //      layout of these backend tensors to the same layout.
571   //      There is also one thing we have to think about. This operation depends on the layout of
572   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
573   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
574   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
575   assert(backend_layout == input_tensor->layout());
576   assert(backend_layout == indices_tensor->layout());
577   const auto &input_shape = _ctx.at(input_index).shape();
578   UNUSED_RELEASE(input_shape);
579   assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
580
581   const auto axis_raw = node.param().axis;
582   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
583
584   auto fn = std::make_unique<ops::GatherLayer>();
585
586   fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
587
588   _return_fn = std::move(fn);
589 }
590
591 void KernelGenerator::visit(const ir::operation::OneHot &node)
592 {
593   const auto output_index{node.getOutputs().at(0)};
594   const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
595   const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
596   const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
597   const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
598
599   const auto axis = node.param().axis;
600
601   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
602   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
603   auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
604   auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
605   auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
606
607   assert(indices_tensor->data_type() == OperandType::INT32);
608   assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
609
610   auto fn = std::make_unique<ops::OneHotLayer>();
611
612   fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
613
614   _return_fn = std::move(fn);
615 }
616
617 void KernelGenerator::visit(const ir::operation::Einsum &node)
618 {
619   const auto ofm_index{node.getOutputs().at(0)};
620
621   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
622   std::vector<const IPortableTensor *> input_tensors;
623   for (auto &ifm_idx : node.getInputs())
624     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
625
626   const auto equation = node.param().equation;
627
628   auto fn = std::make_unique<ops::EinsumLayer>();
629
630   fn->configure(input_tensors, equation, output_tensor);
631
632   _return_fn = std::move(fn);
633 }
634
635 void KernelGenerator::visit(const ir::operation::Custom &node)
636 {
637   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
638                           std::vector<custom::TypeInfo> &types,
639                           std::vector<IPortableTensor *> &tensors) {
640     for (auto &idx : opSeq)
641     {
642       const auto &operand = _ctx.at(idx);
643       // TODO make sure using `_current_op_seq_layout` is correct for custom operations
644       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
645       auto in_tensor = _tensor_reg->getPortableTensor(idx);
646       tensors.emplace_back(in_tensor);
647     }
648   };
649
650   backend::custom::CustomKernelConfigParams params{};
651
652   fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
653   fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
654
655   params.userdata = node.userdata().data;
656   params.userdata_size = node.userdata().size;
657
658   auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
659
660   _return_fn = std::move(fn);
661 }
662
663 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
664 {
665   const auto output_index{node.getOutputs().at(0)};
666   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
667
668   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
669   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
670
671   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
672
673   fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
674                 convertElementwiseActivationType(node.param().op_type));
675
676   _return_fn = std::move(fn);
677 }
678
679 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
680 {
681   const auto output_index{node.getOutputs().at(0)};
682   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
683   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
684
685   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
686   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
687   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
688
689   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
690
691   fn->configure(lhs_tensor, rhs_tensor, output_tensor,
692                 convertElementwiseBinaryType(node.param().op_type));
693
694   _return_fn = std::move(fn);
695 }
696
697 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
698 {
699   const auto output_index{node.getOutputs().at(0)};
700   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
701
702   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
703   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
704
705   auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
706
707   fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
708
709   _return_fn = std::move(fn);
710 }
711
712 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
713 {
714   const auto output_index{node.getOutputs().at(0)};
715   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
716   const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
717
718   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
719   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
720   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
721
722   auto fn = std::make_unique<ops::ExpandDimsLayer>();
723
724   fn->configure(input_tensor, axis_tensor, output_tensor);
725
726   _return_fn = std::move(fn);
727 }
728
729 void KernelGenerator::visit(const ir::operation::Pack &node)
730 {
731   const auto ofm_index{node.getOutputs().at(0)};
732
733   const auto rank = _ctx.at(ofm_index).shape().rank();
734   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
735
736   assert(-rank <= axis && axis < rank);
737
738   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
739
740   std::vector<const IPortableTensor *> input_tensors;
741   for (auto &ifm_idx : node.getInputs())
742     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
743
744   auto fn = std::make_unique<ops::PackLayer>();
745
746   fn->configure(input_tensors, axis, output_tensor);
747
748   _return_fn = std::move(fn);
749 }
750
751 void KernelGenerator::visit(const ir::operation::Unpack &node)
752 {
753   const auto input_index{node.getInputs().at(0)};
754
755   const auto rank = _ctx.at(input_index).shape().rank();
756   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
757
758   assert(rank == 0 || (-rank <= axis && axis < rank));
759
760   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
761
762   std::vector<IPortableTensor *> output_tensors;
763   for (auto &output_idx : node.getOutputs())
764     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
765
766   auto fn = std::make_unique<ops::UnpackLayer>();
767
768   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
769
770   fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
771
772   _return_fn = std::move(fn);
773 }
774
775 void KernelGenerator::visit(const ir::operation::Pad &node)
776 {
777   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
778   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
779   const auto output_index{node.getOutputs().at(0)};
780   assert(_ctx.at(pad_index).data());
781
782   auto input = _tensor_reg->getPortableTensor(input_index);
783   auto output = _tensor_reg->getPortableTensor(output_index);
784   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
785   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
786
787   auto fn = std::make_unique<ops::PadLayer>();
788
789   bool isPadV2 = node.getInputs().size() == 3 ? true : false;
790   const void *value = nullptr;
791
792   if (isPadV2)
793   {
794     const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
795     value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
796   }
797
798   fn->configure(input, output, pad_base, pad_rank, value);
799   _return_fn = std::move(fn);
800 }
801
802 void KernelGenerator::visit(const ir::operation::Transpose &node)
803 {
804   const auto output_index{node.getOutputs().at(0)};
805   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
806   const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
807
808   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
809   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
810   auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
811
812   auto fn = std::make_unique<ops::TransposeLayer>();
813
814   fn->configure(input_tensor, perm_tensor, output_tensor);
815
816   _return_fn = std::move(fn);
817 }
818
819 void KernelGenerator::visit(const ir::operation::Reduce &node)
820 {
821   const auto output_index{node.getOutputs().at(0)};
822   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
823   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
824
825   const auto keep_dims = node.param().keep_dims;
826   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
827   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
828   auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
829
830   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
831   {
832     auto fn = std::make_unique<ops::MeanLayer>();
833
834     fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
835
836     _return_fn = std::move(fn);
837   }
838   else
839   {
840     auto fn = std::make_unique<ops::ReduceLayer>();
841
842     const auto reduce_type = convertReduceType(node.param().reduce_type);
843     fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
844
845     _return_fn = std::move(fn);
846   }
847 }
848
849 void KernelGenerator::visit(const ir::operation::Select &node)
850 {
851   const auto output_index{node.getOutputs().at(0)};
852   const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
853   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
854   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
855
856   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
857   auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
858   auto true_tensor = _tensor_reg->getPortableTensor(true_index);
859   auto false_tensor = _tensor_reg->getPortableTensor(false_index);
860
861   auto fn = std::make_unique<ops::SelectLayer>();
862
863   fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
864
865   _return_fn = std::move(fn);
866 }
867
868 void KernelGenerator::visit(const ir::operation::Slice &node)
869 {
870   const auto output_index{node.getOutputs().at(0)};
871   const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
872   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
873   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
874
875   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
876   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
877   auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
878   auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
879
880   auto fn = std::make_unique<ops::SliceLayer>();
881
882   fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
883
884   _return_fn = std::move(fn);
885 }
886
887 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
888 {
889   const auto output_index{node.getOutputs().at(0)};
890   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
891   const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
892   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
893   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
894
895   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
896   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
897   auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
898   auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
899   auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
900
901   auto begin_mask = node.param().begin_mask;
902   auto end_mask = node.param().end_mask;
903   auto shrink_axis_mask = node.param().shrink_axis_mask;
904
905   auto fn = std::make_unique<ops::StridedSliceLayer>();
906
907   fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
908                 end_mask, shrink_axis_mask);
909
910   _return_fn = std::move(fn);
911 }
912
913 void KernelGenerator::visit(const ir::operation::Split &node)
914 {
915   const auto num_splits = node.param().num_splits;
916   assert(num_splits == static_cast<int>(node.getOutputs().size()));
917
918   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
919   const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
920
921   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
922   auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
923
924   std::vector<IPortableTensor *> out_tensors;
925   for (auto &output_idx : node.getOutputs())
926     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
927
928   auto fn = std::make_unique<ops::SplitLayer>();
929
930   fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
931
932   _return_fn = std::move(fn);
933 }
934
935 void KernelGenerator::visit(const ir::operation::Shape &node)
936 {
937   const auto ofm_index{node.getOutputs().at(0)};
938   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
939
940   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
941   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
942
943   auto fn = std::make_unique<ops::ShapeLayer>();
944
945   fn->configure(ifm_tensor, ofm_tensor);
946
947   _return_fn = std::move(fn);
948 }
949
950 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
951 {
952   const auto output_index{node.getOutputs().at(0)};
953   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
954
955   auto align_corners = node.param().align_corners;
956   auto half_pixel_centers = node.param().half_pixel_centers;
957
958   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
959   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
960
961   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
962
963   if (node.getInputs().size() == 1)
964   {
965     fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
966                   align_corners, half_pixel_centers);
967   }
968   else
969   {
970     assert(node.getInputs().size() == 2);
971     const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
972     auto size_tensor = _tensor_reg->getPortableTensor(size_index);
973     if (size_tensor->is_constant())
974     {
975       auto size_vec = _ctx.at(size_index).asVector<int32_t>();
976       const auto height_out = size_vec[0];
977       const auto width_out = size_vec[1];
978       fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
979                     half_pixel_centers);
980     }
981     else
982     {
983       fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
984     }
985   }
986
987   _return_fn = std::move(fn);
988 }
989
990 void KernelGenerator::visit(const ir::operation::Reverse &node)
991 {
992   const auto output_index{node.getOutputs().at(0)};
993   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
994   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
995
996   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
997   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
998   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
999
1000   auto fn = std::make_unique<ops::ReverseLayer>();
1001
1002   fn->configure(input_tensor, axis_tensor, output_tensor);
1003
1004   _return_fn = std::move(fn);
1005 }
1006
1007 void KernelGenerator::visit(const ir::operation::ArgMax &node)
1008 {
1009   const auto output_index{node.getOutputs().at(0)};
1010   const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
1011   const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
1012
1013   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1014   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1015   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1016
1017   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1018
1019   fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
1020
1021   _return_fn = std::move(fn);
1022 }
1023
1024 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1025 {
1026   const auto ofm_index{node.getOutputs().at(0)};
1027   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1028
1029   const auto kh = node.param().kh;
1030   const auto kw = node.param().kw;
1031   const auto stride = node.param().stride;
1032   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
1033   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
1034   const auto padding =
1035       ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1036   const auto activation = node.param().activation;
1037
1038   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1039   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1040
1041   auto fn = std::make_unique<ops::PoolLayer>();
1042
1043   fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1044                 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1045                 convertPoolType(node.param().op_type));
1046
1047   _return_fn = std::move(fn);
1048 }
1049
1050 void KernelGenerator::visit(const ir::operation::Pow &node)
1051 {
1052   const auto output_index{node.getOutputs().at(0)};
1053   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1054   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1055
1056   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1057   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1058   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1059
1060   auto fn = std::make_unique<ops::PowLayer>();
1061
1062   fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1063
1064   _return_fn = std::move(fn);
1065 }
1066
1067 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1068 {
1069   const auto output_index{node.getOutputs().at(0)};
1070   const auto input_index{node.getInputs().at(0)};
1071
1072   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1073   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1074
1075   auto fn = std::make_unique<ops::L2NormLayer>();
1076
1077   fn->configure(input_alloc, output_alloc);
1078
1079   _return_fn = std::move(fn);
1080 }
1081
1082 void KernelGenerator::visit(const ir::operation::Range &node)
1083 {
1084   const auto output_index{node.getOutputs().at(0)};
1085   const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1086   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1087   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1088
1089   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1090   auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1091   auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1092   auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1093
1094   auto fn = std::make_unique<ops::RangeLayer>();
1095
1096   fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1097   _return_fn = std::move(fn);
1098 }
1099
1100 void KernelGenerator::visit(const ir::operation::Rank &node)
1101 {
1102   const auto ofm_index{node.getOutputs().at(0)};
1103   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1104
1105   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1106   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1107
1108   auto fn = std::make_unique<ops::RankLayer>();
1109
1110   fn->configure(ifm_tensor, ofm_tensor);
1111
1112   _return_fn = std::move(fn);
1113 }
1114
1115 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1116 {
1117   const auto ofm_index{node.getOutputs().at(0)};
1118   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1119   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1120
1121   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1122   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1123   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1124
1125   auto fn = std::make_unique<ops::SqDiffLayer>();
1126
1127   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1128   _return_fn = std::move(fn);
1129 }
1130
1131 void KernelGenerator::visit(const ir::operation::Tile &node)
1132 {
1133   const auto output_index{node.getOutputs().at(0)};
1134   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1135   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1136
1137   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1138   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1139   auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1140
1141   auto fn = std::make_unique<ops::TileLayer>();
1142
1143   fn->configure(input_tensor, multiples_tensor, output_tensor);
1144   _return_fn = std::move(fn);
1145 }
1146
1147 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1148 {
1149   const auto output_index{node.getOutputs().at(0)};
1150   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1151   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1152   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1153
1154   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1155   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1156   auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1157   auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1158
1159   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1160
1161   fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1162   _return_fn = std::move(fn);
1163 }
1164
1165 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1166 {
1167   const auto output_index{node.getOutputs().at(0)};
1168   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1169   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1170
1171   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1172   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1173   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1174
1175   const auto adj_x = node.param().adj_x;
1176   const auto adj_y = node.param().adj_y;
1177
1178   auto fn = std::make_unique<ops::BatchMatMulLayer>();
1179
1180   fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1181   _return_fn = std::move(fn);
1182 }
1183
1184 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1185 {
1186   const auto output_index{node.getOutputs().at(0)};
1187   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1188   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1189
1190   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1191   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1192   auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1193
1194   auto fn = std::make_unique<ops::BroadcastToLayer>();
1195
1196   fn->configure(input_tensor, shape_tensor, output_tensor);
1197
1198   _return_fn = std::move(fn);
1199 }
1200
1201 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1202 {
1203   const auto ofm_index{node.getOutputs().at(0)};
1204
1205   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1206   std::vector<const IPortableTensor *> input_tensors;
1207   for (auto &ifm_idx : node.getInputs())
1208     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1209
1210   const auto epsilon = node.param().epsilon;
1211   const auto is_training = node.param().is_training;
1212   const auto data_format = node.param().data_format;
1213
1214   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1215
1216   fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1217
1218   _return_fn = std::move(fn);
1219 }
1220
1221 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1222 {
1223   const auto output_index{node.getOutputs().at(0)};
1224   const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1225
1226   const auto beta = node.param().beta;
1227   const auto axis = node.param().axis;
1228
1229   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1230   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1231
1232   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1233
1234   fn->configure(input_tensor, beta, axis, output_tensor);
1235
1236   _return_fn = std::move(fn);
1237 }
1238
1239 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1240 {
1241   const auto output_index{node.getOutputs().at(0)};
1242   const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1243   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1244   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1245
1246   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1247   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1248   auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1249   auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1250
1251   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1252
1253   fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1254
1255   _return_fn = std::move(fn);
1256 }
1257
1258 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1259 {
1260   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1261   const auto output_index{node.getOutputs().at(0)};
1262   auto block_size = node.param().block_size;
1263
1264   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1265   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1266
1267   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1268
1269   fn->configure(input_tensor, block_size, output_tensor);
1270   _return_fn = std::move(fn);
1271 }
1272
1273 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1274 {
1275   const auto output_index{node.getOutputs().at(0)};
1276   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1277   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1278
1279   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1280   auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1281   auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1282
1283   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1284
1285   fn->configure(shape_alloc, seed_alloc, output_alloc);
1286   _return_fn = std::move(fn);
1287 }
1288
1289 void KernelGenerator::visit(const ir::operation::SplitV &node)
1290 {
1291   const auto num_splits = node.param().num_splits;
1292   assert(num_splits == static_cast<int>(node.getOutputs().size()));
1293
1294   const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1295   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1296   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1297
1298   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1299   auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1300   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1301
1302   std::vector<IPortableTensor *> out_tensors;
1303   for (auto &output_idx : node.getOutputs())
1304     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1305
1306   auto fn = std::make_unique<ops::SplitVLayer>();
1307
1308   fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1309
1310   _return_fn = std::move(fn);
1311 }
1312
1313 void KernelGenerator::visit(const ir::operation::LSTM &node)
1314 {
1315   const auto scratch_buffer_index{
1316       node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1317   const auto output_state_out_index{
1318       node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1319   const auto cell_state_out_index{
1320       node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1321   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1322
1323   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1324   const auto input_to_input_weights_index{
1325       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1326   const auto input_to_forget_weights_index{
1327       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1328   const auto input_to_cell_weights_index{
1329       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1330   const auto input_to_output_weights_index{
1331       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1332   const auto recurrent_to_input_weights_index{
1333       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1334   const auto recurrent_to_forget_weights_index{
1335       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1336   const auto recurrent_to_cell_weights_index{
1337       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1338   const auto recurrent_to_output_weights_index{
1339       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1340   const auto cell_to_input_weights_index{
1341       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1342   const auto cell_to_forget_weights_index{
1343       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1344   const auto cell_to_output_weights_index{
1345       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1346   const auto input_gate_bias_index{
1347       node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1348   const auto forget_gate_bias_index{
1349       node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1350   const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1351   const auto output_gate_bias_index{
1352       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1353   const auto projection_weights_index{
1354       node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1355   const auto projection_bias_index{
1356       node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1357   const auto output_state_in_index{
1358       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1359   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1360   const auto time_major = node.param().time_major;
1361
1362   // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1363   // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1364   // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1365   // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1366   bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1367                                     (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1368                                      _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1369   bool has_recurrent_to_input_weights =
1370       _ctx.exist(recurrent_to_input_weights_index) &&
1371       (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1372        _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1373
1374   // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1375   // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1376   // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1377   // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1378   bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1379                                     _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1380   bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1381                                     _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1382
1383   bool has_input_gate_bias =
1384       _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1385
1386   bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1387                                 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1388                                  _ctx.at(projection_weights_index).shape().dim(1) != 0);
1389   bool has_projection_bias =
1390       _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1391
1392   auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1393                                    ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1394                                    : nullptr; // optional
1395   auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1396                                      ? _tensor_reg->getPortableTensor(output_state_out_index)
1397                                      : nullptr; // optional
1398   auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1399                                    ? _tensor_reg->getPortableTensor(cell_state_out_index)
1400                                    : nullptr; // optional
1401   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1402
1403   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1404
1405   auto input_to_input_weights_tensor =
1406       has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1407                                  : nullptr; // optional
1408   auto input_to_forget_weights_tensor =
1409       _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1410   auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1411   auto input_to_output_weights_tensor =
1412       _tensor_reg->getPortableTensor(input_to_output_weights_index);
1413   auto recurrent_to_input_weights_tensor =
1414       has_recurrent_to_input_weights
1415           ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1416           : nullptr; // optional
1417   auto recurrent_to_forget_weights_tensor =
1418       _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1419   auto recurrent_to_cell_weights_tensor =
1420       _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1421   auto recurrent_to_output_weights_tensor =
1422       _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1423
1424   auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1425   auto cell_to_forget_weights_tensor =
1426       has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1427                                  : nullptr; // optional
1428   auto cell_to_output_weights_tensor =
1429       has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1430                                  : nullptr; // optional
1431
1432   auto input_gate_bias_tensor =
1433       has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1434   auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1435   auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1436   auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1437   auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1438   auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1439
1440   auto projection_weights_tensor = has_projection_weights
1441                                        ? _tensor_reg->getPortableTensor(projection_weights_index)
1442                                        : nullptr; // optional
1443   auto projection_bias_tensor = has_projection_bias
1444                                     ? _tensor_reg->getPortableTensor(projection_bias_index)
1445                                     : nullptr; // optional
1446
1447   IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1448   IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1449   IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1450   IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1451   if (node.getInputs().size() == 24)
1452   {
1453     const auto input_layer_norm_weights_index{
1454         node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1455     const auto forget_layer_norm_weights_index{
1456         node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1457     const auto cell_layer_norm_weights_index{
1458         node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1459     const auto output_layer_norm_weights_index{
1460         node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1461
1462     input_layer_norm_weights_tensor =
1463         _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1464     forget_layer_norm_weights_tensor =
1465         _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1466     cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1467     output_layer_norm_weights_tensor =
1468         _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1469   }
1470
1471   auto fn = std::make_unique<ops::LSTMLayer>();
1472
1473   fn->configure(
1474       input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1475       input_to_cell_weights_tensor, input_to_output_weights_tensor,
1476       recurrent_to_input_weights_tensor, recurrent_to_forget_weights_tensor,
1477       recurrent_to_cell_weights_tensor, recurrent_to_output_weights_tensor,
1478       cell_to_input_weights_tensor, cell_to_forget_weights_tensor, cell_to_output_weights_tensor,
1479       input_layer_norm_weights_tensor, forget_layer_norm_weights_tensor,
1480       cell_layer_norm_weights_tensor, output_layer_norm_weights_tensor,
1481       /*aux_input=*/nullptr,
1482       /*aux_input_to_input_weights=*/nullptr,
1483       /*aux_input_to_forget_weights=*/nullptr,
1484       /*aux_input_to_cell_weights=*/nullptr,
1485       /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1486       cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1487       projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1488       /*forward_sequence=*/true, time_major,
1489       /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1490       output_tensor,
1491       !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1492       !_ctx.at(cell_state_in_index).info().isVariable());
1493
1494   _return_fn = std::move(fn);
1495 }
1496
1497 } // namespace cpu
1498 } // namespace backend
1499 } // namespace onert