runtime/onert/backend/cpu/KernelGenerator.cc

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "KernelGenerator.h"
  18
  19 #include "ops/AddNLayer.h"
  20 #include "ops/ArgMinMaxLayer.h"
  21 #include "ops/BatchToSpaceNDLayer.h"
  22 #include "ops/BinaryArithmeticLayer.h"
  23 #include "ops/CompareLayer.h"
  24 #include "ops/ConcatLayer.h"
  25 #include "ops/ConvolutionLayer.h"
  26 #include "ops/DepthToSpaceLayer.h"
  27 #include "ops/DepthwiseConvolutionLayer.h"
  28 #include "ops/EinsumLayer.h"
  29 #include "ops/ElementwiseActivationLayer.h"
  30 #include "ops/ElementwiseBinaryLayer.h"
  31 #include "ops/ElementwiseUnaryLayer.h"
  32 #include "ops/ExpandDimsLayer.h"
  33 #include "ops/FillLayer.h"
  34 #include "ops/FullyConnectedLayer.h"
  35 #include "ops/GatherLayer.h"
  36 #include "ops/LSTMLayer.h"
  37 #include "ops/MeanLayer.h"
  38 #include "ops/DetectionPostProcessLayer.h"
  39 #include "ops/OneHotLayer.h"
  40 #include "ops/OperationUtils.h"
  41 #include "ops/PackLayer.h"
  42 #include "ops/PadLayer.h"
  43 #include "ops/PoolLayer.h"
  44 #include "ops/PowLayer.h"
  45 #include "ops/QuantizeLayer.h"
  46 #include "ops/RangeLayer.h"
  47 #include "ops/RankLayer.h"
  48 #include "ops/ReduceLayer.h"
  49 #include "ops/ReshapeLayer.h"
  50 #include "ops/ResizeBilinearLayer.h"
  51 #include "ops/ReverseLayer.h"
  52 #include "ops/SelectLayer.h"
  53 #include "ops/ShapeLayer.h"
  54 #include "ops/SliceLayer.h"
  55 #include "ops/SoftMaxLayer.h"
  56 #include "ops/StridedSliceLayer.h"
  57 #include "ops/SpaceToBatchNDLayer.h"
  58 #include "ops/SpaceToDepthLayer.h"
  59 #include "ops/SplitLayer.h"
  60 #include "ops/SplitVLayer.h"
  61 #include "ops/TileLayer.h"
  62 #include "ops/TransposeLayer.h"
  63 #include "ops/UnpackLayer.h"
  64 #include "ops/SquaredDiffLayer.h"
  65 #include "ops/L2NormLayer.h"
  66 #include "ops/MatrixBandPartLayer.h"
  67 #include "ops/BatchMatMulLayer.h"
  68 #include "ops/BroadcastToLayer.h"
  69 #include "ops/FusedBatchNormLayer.h"
  70 #include "ops/LogSoftMaxLayer.h"
  71 #include "ops/StatelessRandomUniformLayer.h"
  72
  73 #include <backend/Backend.h>
  74 #include <backend/IConfig.h>
  75 #include <memory>
  76 #include <util/Utils.h>
  77 #include <util/logging.h>
  78 #include <exec/DynamicShapeInferer.h>
  79
  80 #include <stdexcept>
  81
  82 namespace onert
  83 {
  84 namespace backend
  85 {
  86 namespace cpu
  87 {
  88
  89 namespace
  90 {
  91 ops::ArithmeticType
  92 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
  93 {
  94   switch (arithmetic_type_ir)
  95   {
  96     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
  97       return ops::ArithmeticType::kAdd;
  98     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
  99       return ops::ArithmeticType::kSub;
 100     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
 101       return ops::ArithmeticType::kMul;
 102     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
 103       return ops::ArithmeticType::kDiv;
 104     default:
 105       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 106   }
 107 }
 108
 109 ops::ElementwiseActivationType
 110 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
 111 {
 112   switch (type_ir)
 113   {
 114     case ir::operation::ElementwiseActivation::Type::ELU:
 115       return ops::ElementwiseActivationType::kElu;
 116     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
 117       return ops::ElementwiseActivationType::kLogistic;
 118     case ir::operation::ElementwiseActivation::Type::RELU:
 119       return ops::ElementwiseActivationType::kReLU;
 120     case ir::operation::ElementwiseActivation::Type::TANH:
 121       return ops::ElementwiseActivationType::kTanh;
 122     case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
 123       return ops::ElementwiseActivationType::kLeakyReLU;
 124     default:
 125       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 126   }
 127 }
 128
 129 ops::ElementwiseBinaryType
 130 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
 131 {
 132   switch (type_ir)
 133   {
 134     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::FLOOR_DIV:
 135       return ops::ElementwiseBinaryType::kFloorDiv;
 136     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
 137       return ops::ElementwiseBinaryType::kLogicalAnd;
 138     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
 139       return ops::ElementwiseBinaryType::kLogicalOr;
 140     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
 141       return ops::ElementwiseBinaryType::kMax;
 142     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
 143       return ops::ElementwiseBinaryType::kMin;
 144     default:
 145       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 146   }
 147 }
 148
 149 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
 150 {
 151   switch (type_ir)
 152   {
 153     case ir::operation::ElementwiseUnary::Type::ABS:
 154       return ops::ElementwiseUnaryType::kAbs;
 155     case ir::operation::ElementwiseUnary::Type::CAST:
 156       return ops::ElementwiseUnaryType::kCast;
 157     case ir::operation::ElementwiseUnary::Type::COS:
 158       return ops::ElementwiseUnaryType::kCos;
 159     case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
 160       return ops::ElementwiseUnaryType::kDequantize;
 161     case ir::operation::ElementwiseUnary::Type::ERF:
 162       return ops::ElementwiseUnaryType::kErf;
 163     case ir::operation::ElementwiseUnary::Type::EXP:
 164       return ops::ElementwiseUnaryType::kExp;
 165     case ir::operation::ElementwiseUnary::Type::FLOOR:
 166       return ops::ElementwiseUnaryType::kFloor;
 167     case ir::operation::ElementwiseUnary::Type::LOG:
 168       return ops::ElementwiseUnaryType::kLog;
 169     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
 170       return ops::ElementwiseUnaryType::kLogicalNot;
 171     case ir::operation::ElementwiseUnary::Type::NEG:
 172       return ops::ElementwiseUnaryType::kNeg;
 173     case ir::operation::ElementwiseUnary::Type::QUANTIZE:
 174       return ops::ElementwiseUnaryType::kQuantize;
 175     case ir::operation::ElementwiseUnary::Type::ROUND:
 176       return ops::ElementwiseUnaryType::kRound;
 177     case ir::operation::ElementwiseUnary::Type::RSQRT:
 178       return ops::ElementwiseUnaryType::kRSqrt;
 179     case ir::operation::ElementwiseUnary::Type::SIN:
 180       return ops::ElementwiseUnaryType::kSin;
 181     case ir::operation::ElementwiseUnary::Type::SQRT:
 182       return ops::ElementwiseUnaryType::kSqrt;
 183     case ir::operation::ElementwiseUnary::Type::SQUARE:
 184       return ops::ElementwiseUnaryType::kSquare;
 185     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
 186       return ops::ElementwiseUnaryType::kZerosLike;
 187     default:
 188       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 189   }
 190 }
 191
 192 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
 193 {
 194   switch (type_ir)
 195   {
 196     case ir::operation::Pool2D::PoolType::AVG:
 197       return ops::PoolType::kAvg;
 198     case ir::operation::Pool2D::PoolType::MAX:
 199       return ops::PoolType::kMax;
 200     default:
 201       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 202   }
 203 }
 204
 205 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 206 {
 207   switch (reduce_type_ir)
 208   {
 209     case ir::operation::Reduce::ReduceType::ALL:
 210       return ops::ReduceType::kAll;
 211     case ir::operation::Reduce::ReduceType::ANY:
 212       return ops::ReduceType::kAny;
 213     case ir::operation::Reduce::ReduceType::MAX:
 214       return ops::ReduceType::kMax;
 215     case ir::operation::Reduce::ReduceType::MIN:
 216       return ops::ReduceType::kMin;
 217     case ir::operation::Reduce::ReduceType::PROD:
 218       return ops::ReduceType::kProd;
 219     case ir::operation::Reduce::ReduceType::SUM:
 220       return ops::ReduceType::kSum;
 221     default:
 222       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 223   }
 224 }
 225 } // namespace
 226
 227 KernelGenerator::KernelGenerator(
 228   const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
 229   const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
 230   const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
 231   const std::shared_ptr<ExternalContext> &external_context)
 232   : basic::KernelGeneratorBase{graph},
 233     _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
 234     _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
 235     _external_context(external_context)
 236 {
 237   // DO NOTHING
 238 }
 239
 240 std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
 241 {
 242   auto ret = std::make_unique<exec::FunctionSequence>();
 243
 244   assert(_tensor_builder->dynamicTensorManager());
 245   assert(_tensor_reg);
 246
 247   auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
 248
 249   // Prepare to handle dynamic tensors later
 250   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
 251   {
 252     dyn_ctx->op_ind = ind;
 253     dyn_ctx->operations = &_operations_ctx;
 254     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
 255
 256     ret->dynamic_tensor_ctx(dyn_ctx);
 257   }
 258
 259   auto &op = _graph.operations().at(ind);
 260   op.accept(*this);
 261   assert(_return_fn); // _return_fn must have been generated
 262   ret->append(std::move(_return_fn));
 263
 264   for (auto ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
 265   {
 266     auto portable_tensor = _tensor_reg->getPortableTensor(ind);
 267     if (portable_tensor)
 268     {
 269       assert(portable_tensor->layout() == ir::Layout::NHWC);
 270     }
 271
 272     auto tensor = _tensor_reg->getNativeTensor(ind);
 273     if (tensor)
 274     {
 275       tensor->increase_ref();
 276     }
 277   }
 278   return ret;
 279 }
 280
 281 void KernelGenerator::visit(const ir::operation::AddN &node)
 282 {
 283   const auto output_index{node.getOutputs().at(0)};
 284
 285   std::vector<const IPortableTensor *> input_tensors;
 286   for (auto &input_idx : node.getInputs())
 287     input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
 288
 289   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 290
 291   auto fn = std::make_unique<ops::AddNLayer>();
 292
 293   fn->configure(std::move(input_tensors), output_tensor);
 294
 295   _return_fn = std::move(fn);
 296 }
 297
 298 void KernelGenerator::visit(const ir::operation::Conv2D &node)
 299 {
 300   using ir::operation::Conv2D;
 301
 302   const auto ofm_index{node.getOutputs().at(0)};
 303   const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
 304   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
 305   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 306
 307   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 308   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 309   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
 310   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 311
 312   const auto stride = node.param().stride;
 313   const auto activation = node.param().activation;
 314   const auto param_padding = node.param().padding;
 315   const auto dilation = node.param().dilation;
 316   auto fn = std::make_unique<ops::ConvolutionLayer>();
 317
 318   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
 319   {
 320     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
 321                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
 322                   stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
 323                   activation, ofm_tensor);
 324
 325     _return_fn = std::move(fn);
 326     return;
 327   }
 328   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
 329   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
 330   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
 331   const auto &ker_shape = _ctx.at(ker_index).shape();
 332   const auto ker_height = ker_shape.dim(1);
 333   const auto ker_width = ker_shape.dim(2);
 334
 335   const auto padding =
 336     ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
 337                          dilation.width_factor, dilation.height_factor);
 338
 339   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
 340                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
 341                 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
 342
 343   _return_fn = std::move(fn);
 344 }
 345
 346 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 347 {
 348   using ir::operation::DepthwiseConv2D;
 349
 350   const auto ofm_index{node.getOutputs().at(0)};
 351   const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
 352   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
 353   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 354
 355   const auto stride = node.param().stride;
 356   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
 357   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
 358   // Kernel format is [1, kernel_height, kernel_width, depth_out].
 359   const auto &ker_shape = _ctx.at(ker_index).shape();
 360   const auto ker_height = ker_shape.dim(1);
 361   const auto ker_width = ker_shape.dim(2);
 362   const auto dilation_width = node.param().dilation.width_factor;
 363   const auto dilation_height = node.param().dilation.height_factor;
 364   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
 365                                             ker_width, ker_height, dilation_width, dilation_height);
 366   const auto multiplier = node.param().multiplier;
 367   const auto activation = node.param().activation;
 368
 369   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 370   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 371   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
 372   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 373
 374   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
 375
 376   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
 377                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
 378                 dilation_height, activation, ofm_tensor, _external_context);
 379
 380   _return_fn = std::move(fn);
 381 }
 382
 383 void KernelGenerator::visit(const ir::operation::Concat &node)
 384 {
 385   const auto ofm_index{node.getOutputs().at(0)};
 386
 387   const auto rank = _ctx.at(ofm_index).shape().rank();
 388   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 389
 390   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 391
 392   std::vector<const IPortableTensor *> input_tensors;
 393   for (auto &ifm_idx : node.getInputs())
 394     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 395
 396   auto fn = std::make_unique<ops::ConcatLayer>();
 397
 398   fn->configure(input_tensors, axis, output_tensor);
 399
 400   _return_fn = std::move(fn);
 401 }
 402
 403 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
 404 {
 405   const auto output_index{node.getOutputs().at(0)};
 406   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
 407   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
 408
 409   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
 410   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
 411   auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
 412
 413   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
 414
 415   IPortableTensor *crops_alloc = nullptr;
 416   const auto NNApiInputs = 2;
 417
 418   if (node.getInputs().size() != NNApiInputs)
 419   {
 420     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
 421     crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
 422   }
 423
 424   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
 425
 426   _return_fn = std::move(fn);
 427 }
 428
 429 void KernelGenerator::visit(const ir::operation::Fill &node)
 430 {
 431   const auto output_index{node.getOutputs().at(0)};
 432   // SHAPE input is used for shape inference
 433   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 434
 435   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 436   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
 437
 438   auto fn = std::make_unique<ops::FillLayer>();
 439
 440   fn->configure(value_tensor, output_tensor);
 441
 442   _return_fn = std::move(fn);
 443 }
 444
 445 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 446 {
 447   using ir::operation::FullyConnected;
 448
 449   const auto output_index{node.getOutputs().at(0)};
 450   const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
 451   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
 452   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
 453   const auto activation = node.param().activation;
 454   const auto weights_format = node.param().weights_format;
 455
 456   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 457   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 458   auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
 459   auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
 460
 461   auto fn = std::make_unique<ops::FullyConnectedLayer>();
 462
 463   fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
 464                 _external_context);
 465
 466   _return_fn = std::move(fn);
 467 }
 468
 469 void KernelGenerator::visit(const ir::operation::Reshape &node)
 470 {
 471   const auto output_index{node.getOutputs().at(0)};
 472   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 473
 474   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 475   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 476
 477   // optional 2nd input
 478   IPortableTensor *shape_tensor = nullptr;
 479
 480   if (node.getInputs().size() == 2)
 481   {
 482     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
 483     shape_tensor = _tensor_reg->getPortableTensor(shape_index);
 484   }
 485
 486   auto fn = std::make_unique<ops::ReshapeLayer>();
 487
 488   fn->configure(input_tensor, shape_tensor, output_tensor);
 489   _return_fn = std::move(fn);
 490 }
 491
 492 void KernelGenerator::visit(const ir::operation::Squeeze &node)
 493 {
 494   const auto output_index{node.getOutputs().at(0)};
 495   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
 496
 497   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 498   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 499
 500   // Squeeze can share same kernel with reshape
 501   auto fn = std::make_unique<ops::ReshapeLayer>();
 502
 503   fn->configure(input_tensor, nullptr, output_tensor);
 504
 505   _return_fn = std::move(fn);
 506 }
 507
 508 void KernelGenerator::visit(const ir::operation::Softmax &node)
 509 {
 510   const auto output_index{node.getOutputs().at(0)};
 511   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
 512
 513   const auto beta = node.param().beta;
 514
 515   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 516   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 517
 518   auto fn = std::make_unique<ops::SoftMaxLayer>();
 519
 520   fn->configure(input_tensor, beta, output_tensor);
 521
 522   _return_fn = std::move(fn);
 523 }
 524
 525 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 526 {
 527   const auto ofm_index{node.getOutputs().at(0)};
 528   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
 529   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 530
 531   const auto activation = node.param().activation;
 532
 533   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 534   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 535   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 536
 537   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
 538
 539   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
 540                 convertArithmeticType(node.param().arithmetic_type));
 541
 542   _return_fn = std::move(fn);
 543 }
 544
 545 void KernelGenerator::visit(const ir::operation::Comparison &node)
 546 {
 547   const auto ofm_index{node.getOutputs().at(0)};
 548   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
 549   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
 550
 551   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 552   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 553   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 554
 555   auto comparison_type = node.param().comparison_type;
 556
 557   auto fn = std::make_unique<ops::CompareLayer>();
 558
 559   fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
 560
 561   _return_fn = std::move(fn);
 562 }
 563
 564 void KernelGenerator::visit(const ir::operation::Gather &node)
 565 {
 566   const auto output_index{node.getOutputs().at(0)};
 567   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
 568   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 569
 570   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 571   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 572   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 573
 574   const auto backend_layout = output_tensor->layout();
 575   UNUSED_RELEASE(backend_layout);
 576
 577   // NOTE The frontend layout and backend layout must be the same for this operation.
 578   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
 579   //      is not not efficient even if it works well. If so, it would be better to set the
 580   //      layout of these backend tensors to the same layout.
 581   //      There is also one thing we have to think about. This operation depends on the layout of
 582   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
 583   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
 584   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
 585   assert(backend_layout == input_tensor->layout());
 586   assert(backend_layout == indices_tensor->layout());
 587   const auto &input_shape = _ctx.at(input_index).shape();
 588   UNUSED_RELEASE(input_shape);
 589   assert(input_shape.rank() < 4 || _current_layout == backend_layout);
 590
 591   const auto axis_raw = node.param().axis;
 592   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
 593
 594   auto fn = std::make_unique<ops::GatherLayer>();
 595
 596   fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
 597
 598   _return_fn = std::move(fn);
 599 }
 600
 601 void KernelGenerator::visit(const ir::operation::OneHot &node)
 602 {
 603   const auto output_index{node.getOutputs().at(0)};
 604   const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
 605   const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
 606   const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
 607   const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
 608
 609   const auto axis = node.param().axis;
 610
 611   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 612   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 613   auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
 614   auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
 615   auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
 616
 617   assert(indices_tensor->data_type() == OperandType::INT32);
 618   assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
 619
 620   auto fn = std::make_unique<ops::OneHotLayer>();
 621
 622   fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
 623
 624   _return_fn = std::move(fn);
 625 }
 626
 627 void KernelGenerator::visit(const ir::operation::Einsum &node)
 628 {
 629   const auto ofm_index{node.getOutputs().at(0)};
 630
 631   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 632   std::vector<const IPortableTensor *> input_tensors;
 633   for (auto &ifm_idx : node.getInputs())
 634     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 635
 636   const auto equation = node.param().equation;
 637
 638   auto fn = std::make_unique<ops::EinsumLayer>();
 639
 640   fn->configure(input_tensors, equation, output_tensor);
 641
 642   _return_fn = std::move(fn);
 643 }
 644
 645 void KernelGenerator::visit(const ir::operation::Custom &node)
 646 {
 647   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
 648                           std::vector<custom::TypeInfo> &types,
 649                           std::vector<IPortableTensor *> &tensors) {
 650     for (auto &idx : opSeq)
 651     {
 652       const auto &operand = _ctx.at(idx);
 653       // TODO make sure using `_current_layout` is correct for custom operations
 654       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
 655       auto in_tensor = _tensor_reg->getPortableTensor(idx);
 656       tensors.emplace_back(in_tensor);
 657     }
 658   };
 659
 660   backend::custom::CustomKernelConfigParams params{};
 661
 662   fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
 663   fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
 664
 665   params.userdata = node.userdata().data;
 666   params.userdata_size = node.userdata().size;
 667
 668   auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
 669
 670   _return_fn = std::move(fn);
 671 }
 672
 673 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 674 {
 675   const auto output_index{node.getOutputs().at(0)};
 676   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 677
 678   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 679   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 680
 681   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
 682
 683   fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
 684                 convertElementwiseActivationType(node.param().op_type));
 685
 686   _return_fn = std::move(fn);
 687 }
 688
 689 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
 690 {
 691   const auto output_index{node.getOutputs().at(0)};
 692   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
 693   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 694
 695   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 696   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 697   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 698
 699   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
 700
 701   fn->configure(lhs_tensor, rhs_tensor, output_tensor,
 702                 convertElementwiseBinaryType(node.param().op_type));
 703
 704   _return_fn = std::move(fn);
 705 }
 706
 707 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 708 {
 709   const auto output_index{node.getOutputs().at(0)};
 710   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 711
 712   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 713   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 714
 715   if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
 716   {
 717     auto fn = std::make_unique<ops::QuantizeLayer>();
 718     fn->configure(input_tensor, output_tensor);
 719     _return_fn = std::move(fn);
 720   }
 721   else
 722   {
 723     auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
 724     fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
 725     _return_fn = std::move(fn);
 726   }
 727 }
 728
 729 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 730 {
 731   const auto output_index{node.getOutputs().at(0)};
 732   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 733   // AXIS input is used for output shape inference
 734
 735   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 736   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 737
 738   auto fn = std::make_unique<ops::ExpandDimsLayer>();
 739
 740   fn->configure(input_tensor, output_tensor);
 741
 742   _return_fn = std::move(fn);
 743 }
 744
 745 void KernelGenerator::visit(const ir::operation::Pack &node)
 746 {
 747   const auto ofm_index{node.getOutputs().at(0)};
 748
 749   const auto rank = _ctx.at(ofm_index).shape().rank();
 750   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 751
 752   assert(-rank <= axis && axis < rank);
 753
 754   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 755
 756   std::vector<const IPortableTensor *> input_tensors;
 757   for (auto &ifm_idx : node.getInputs())
 758     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 759
 760   auto fn = std::make_unique<ops::PackLayer>();
 761
 762   fn->configure(input_tensors, axis, output_tensor);
 763
 764   _return_fn = std::move(fn);
 765 }
 766
 767 void KernelGenerator::visit(const ir::operation::Unpack &node)
 768 {
 769   const auto input_index{node.getInputs().at(0)};
 770
 771   const auto rank = _ctx.at(input_index).shape().rank();
 772   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 773
 774   assert(rank == 0 || (-rank <= axis && axis < rank));
 775
 776   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 777
 778   std::vector<IPortableTensor *> output_tensors;
 779   for (auto &output_idx : node.getOutputs())
 780     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 781
 782   auto fn = std::make_unique<ops::UnpackLayer>();
 783
 784   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
 785
 786   fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
 787
 788   _return_fn = std::move(fn);
 789 }
 790
 791 void KernelGenerator::visit(const ir::operation::Pad &node)
 792 {
 793   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
 794   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
 795   const auto output_index{node.getOutputs().at(0)};
 796   assert(_ctx.at(pad_index).data());
 797
 798   auto input = _tensor_reg->getPortableTensor(input_index);
 799   auto output = _tensor_reg->getPortableTensor(output_index);
 800   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
 801   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
 802
 803   auto fn = std::make_unique<ops::PadLayer>();
 804
 805   bool isPadV2 = node.getInputs().size() == 3 ? true : false;
 806   const void *value = nullptr;
 807
 808   if (isPadV2)
 809   {
 810     const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
 811     value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
 812   }
 813
 814   fn->configure(input, output, pad_base, pad_rank, value);
 815   _return_fn = std::move(fn);
 816 }
 817
 818 void KernelGenerator::visit(const ir::operation::Transpose &node)
 819 {
 820   const auto output_index{node.getOutputs().at(0)};
 821   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
 822   const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
 823
 824   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 825   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 826   auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
 827
 828   auto fn = std::make_unique<ops::TransposeLayer>();
 829
 830   fn->configure(input_tensor, perm_tensor, output_tensor);
 831
 832   _return_fn = std::move(fn);
 833 }
 834
 835 void KernelGenerator::visit(const ir::operation::Reduce &node)
 836 {
 837   const auto output_index{node.getOutputs().at(0)};
 838   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
 839   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 840
 841   const auto keep_dims = node.param().keep_dims;
 842   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 843   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 844   auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
 845
 846   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
 847   {
 848     auto fn = std::make_unique<ops::MeanLayer>();
 849
 850     fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
 851
 852     _return_fn = std::move(fn);
 853   }
 854   else
 855   {
 856     auto fn = std::make_unique<ops::ReduceLayer>();
 857
 858     const auto reduce_type = convertReduceType(node.param().reduce_type);
 859     fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
 860
 861     _return_fn = std::move(fn);
 862   }
 863 }
 864
 865 void KernelGenerator::visit(const ir::operation::Select &node)
 866 {
 867   const auto output_index{node.getOutputs().at(0)};
 868   const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
 869   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
 870   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 871
 872   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 873   auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
 874   auto true_tensor = _tensor_reg->getPortableTensor(true_index);
 875   auto false_tensor = _tensor_reg->getPortableTensor(false_index);
 876
 877   auto fn = std::make_unique<ops::SelectLayer>();
 878
 879   fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
 880
 881   _return_fn = std::move(fn);
 882 }
 883
 884 void KernelGenerator::visit(const ir::operation::Slice &node)
 885 {
 886   const auto output_index{node.getOutputs().at(0)};
 887   const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
 888   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
 889   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 890
 891   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 892   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 893   auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
 894   auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
 895
 896   auto fn = std::make_unique<ops::SliceLayer>();
 897
 898   fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
 899
 900   _return_fn = std::move(fn);
 901 }
 902
 903 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 904 {
 905   const auto output_index{node.getOutputs().at(0)};
 906   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
 907   const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
 908   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
 909   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 910
 911   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 912   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 913   auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
 914   auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
 915   auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
 916
 917   auto begin_mask = node.param().begin_mask;
 918   auto end_mask = node.param().end_mask;
 919   auto shrink_axis_mask = node.param().shrink_axis_mask;
 920
 921   auto fn = std::make_unique<ops::StridedSliceLayer>();
 922
 923   fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
 924                 end_mask, shrink_axis_mask);
 925
 926   _return_fn = std::move(fn);
 927 }
 928
 929 void KernelGenerator::visit(const ir::operation::Split &node)
 930 {
 931   const auto num_splits = node.param().num_splits;
 932   assert(num_splits == static_cast<int>(node.getOutputs().size()));
 933
 934   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
 935   const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
 936
 937   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
 938   auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
 939
 940   std::vector<IPortableTensor *> out_tensors;
 941   for (auto &output_idx : node.getOutputs())
 942     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 943
 944   auto fn = std::make_unique<ops::SplitLayer>();
 945
 946   fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
 947
 948   _return_fn = std::move(fn);
 949 }
 950
 951 void KernelGenerator::visit(const ir::operation::Shape &node)
 952 {
 953   const auto ofm_index{node.getOutputs().at(0)};
 954   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 955
 956   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 957   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 958
 959   auto fn = std::make_unique<ops::ShapeLayer>();
 960
 961   fn->configure(ifm_tensor, ofm_tensor);
 962
 963   _return_fn = std::move(fn);
 964 }
 965
 966 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 967 {
 968   const auto output_index{node.getOutputs().at(0)};
 969   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
 970
 971   auto align_corners = node.param().align_corners;
 972   auto half_pixel_centers = node.param().half_pixel_centers;
 973
 974   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 975   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 976
 977   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
 978
 979   if (node.getInputs().size() == 1)
 980   {
 981     fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
 982                   align_corners, half_pixel_centers);
 983   }
 984   else
 985   {
 986     assert(node.getInputs().size() == 2);
 987     const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
 988     auto size_tensor = _tensor_reg->getPortableTensor(size_index);
 989     if (size_tensor->is_constant())
 990     {
 991       auto size_vec = _ctx.at(size_index).asVector<int32_t>();
 992       const auto height_out = size_vec[0];
 993       const auto width_out = size_vec[1];
 994       fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
 995                     half_pixel_centers);
 996     }
 997     else
 998     {
 999       fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
1000     }
1001   }
1002
1003   _return_fn = std::move(fn);
1004 }
1005
1006 void KernelGenerator::visit(const ir::operation::Reverse &node)
1007 {
1008   const auto output_index{node.getOutputs().at(0)};
1009   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
1010   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
1011
1012   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1013   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1014   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1015
1016   auto fn = std::make_unique<ops::ReverseLayer>();
1017
1018   fn->configure(input_tensor, axis_tensor, output_tensor);
1019
1020   _return_fn = std::move(fn);
1021 }
1022
1023 void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1024 {
1025   const auto output_index{node.getOutputs().at(0)};
1026   const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
1027   const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
1028
1029   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1030   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1031   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1032
1033   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1034
1035   fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1036
1037   _return_fn = std::move(fn);
1038 }
1039
1040 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1041 {
1042   const auto ofm_index{node.getOutputs().at(0)};
1043   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1044
1045   const auto kh = node.param().kh;
1046   const auto kw = node.param().kw;
1047   const auto stride = node.param().stride;
1048   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
1049   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
1050   const auto padding =
1051     ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1052   const auto activation = node.param().activation;
1053
1054   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1055   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1056
1057   auto fn = std::make_unique<ops::PoolLayer>();
1058
1059   fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1060                 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1061                 convertPoolType(node.param().op_type));
1062
1063   _return_fn = std::move(fn);
1064 }
1065
1066 void KernelGenerator::visit(const ir::operation::Pow &node)
1067 {
1068   const auto output_index{node.getOutputs().at(0)};
1069   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1070   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1071
1072   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1073   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1074   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1075
1076   auto fn = std::make_unique<ops::PowLayer>();
1077
1078   fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1079
1080   _return_fn = std::move(fn);
1081 }
1082
1083 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1084 {
1085   const auto output_index{node.getOutputs().at(0)};
1086   const auto input_index{node.getInputs().at(0)};
1087
1088   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1089   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1090
1091   auto fn = std::make_unique<ops::L2NormLayer>();
1092
1093   fn->configure(input_alloc, output_alloc);
1094
1095   _return_fn = std::move(fn);
1096 }
1097
1098 void KernelGenerator::visit(const ir::operation::Range &node)
1099 {
1100   const auto output_index{node.getOutputs().at(0)};
1101   const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1102   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1103   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1104
1105   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1106   auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1107   auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1108   auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1109
1110   auto fn = std::make_unique<ops::RangeLayer>();
1111
1112   fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1113   _return_fn = std::move(fn);
1114 }
1115
1116 void KernelGenerator::visit(const ir::operation::Rank &node)
1117 {
1118   const auto ofm_index{node.getOutputs().at(0)};
1119   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1120
1121   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1122   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1123
1124   auto fn = std::make_unique<ops::RankLayer>();
1125
1126   fn->configure(ifm_tensor, ofm_tensor);
1127
1128   _return_fn = std::move(fn);
1129 }
1130
1131 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1132 {
1133   const auto ofm_index{node.getOutputs().at(0)};
1134   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1135   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1136
1137   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1138   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1139   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1140
1141   auto fn = std::make_unique<ops::SqDiffLayer>();
1142
1143   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1144   _return_fn = std::move(fn);
1145 }
1146
1147 void KernelGenerator::visit(const ir::operation::Tile &node)
1148 {
1149   const auto output_index{node.getOutputs().at(0)};
1150   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1151   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1152
1153   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1154   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1155   auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1156
1157   auto fn = std::make_unique<ops::TileLayer>();
1158
1159   fn->configure(input_tensor, multiples_tensor, output_tensor);
1160   _return_fn = std::move(fn);
1161 }
1162
1163 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1164 {
1165   const auto output_index{node.getOutputs().at(0)};
1166   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1167   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1168   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1169
1170   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1171   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1172   auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1173   auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1174
1175   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1176
1177   fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1178   _return_fn = std::move(fn);
1179 }
1180
1181 void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
1182 {
1183   using NMS = ir::operation::DetectionPostProcess;
1184
1185   ops::DetectionPostProcessLayer::DetectionPostProcessParameters parameters;
1186   parameters.scales.y = node.param().scale.y_scale;
1187   parameters.scales.x = node.param().scale.x_scale;
1188   parameters.scales.w = node.param().scale.w_scale;
1189   parameters.scales.h = node.param().scale.h_scale;
1190
1191   parameters.iou_threshold = node.param().iou_threshold;
1192   parameters.score_threshold = node.param().score_threshold;
1193   parameters.max_boxes_per_class = node.param().max_boxes_per_class;
1194   parameters.max_detections = node.param().max_detections;
1195   parameters.num_classes = node.param().num_classes;
1196   parameters.center_box_format = node.param().center_size_boxes;
1197   parameters.max_classes_per_detection = node.param().max_classes_per_detection;
1198
1199   auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
1200   auto scores_index = node.getInputs().at(NMS::Input::SCORES);
1201   auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
1202
1203   auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
1204   auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
1205   auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
1206   auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
1207
1208   parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
1209   parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
1210
1211   parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
1212   parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
1213   parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
1214
1215   parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
1216   parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
1217   parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
1218   parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
1219
1220   auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
1221   fn->configure(std::move(parameters));
1222
1223   _return_fn = std::move(fn);
1224 }
1225
1226 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1227 {
1228   const auto output_index{node.getOutputs().at(0)};
1229   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1230   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1231
1232   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1233   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1234   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1235
1236   const auto adj_x = node.param().adj_x;
1237   const auto adj_y = node.param().adj_y;
1238
1239   auto fn = std::make_unique<ops::BatchMatMulLayer>();
1240
1241   fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1242   _return_fn = std::move(fn);
1243 }
1244
1245 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1246 {
1247   const auto output_index{node.getOutputs().at(0)};
1248   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1249   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1250
1251   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1252   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1253   auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1254
1255   auto fn = std::make_unique<ops::BroadcastToLayer>();
1256
1257   fn->configure(input_tensor, shape_tensor, output_tensor);
1258
1259   _return_fn = std::move(fn);
1260 }
1261
1262 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1263 {
1264   const auto ofm_index{node.getOutputs().at(0)};
1265
1266   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1267   std::vector<const IPortableTensor *> input_tensors;
1268   for (auto &ifm_idx : node.getInputs())
1269     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1270
1271   const auto epsilon = node.param().epsilon;
1272   const auto is_training = node.param().is_training;
1273   const auto data_format = node.param().data_format;
1274
1275   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1276
1277   fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1278
1279   _return_fn = std::move(fn);
1280 }
1281
1282 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1283 {
1284   const auto output_index{node.getOutputs().at(0)};
1285   const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1286
1287   const auto beta = node.param().beta;
1288   const auto axis = node.param().axis;
1289
1290   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1291   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1292
1293   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1294
1295   fn->configure(input_tensor, beta, axis, output_tensor);
1296
1297   _return_fn = std::move(fn);
1298 }
1299
1300 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1301 {
1302   const auto output_index{node.getOutputs().at(0)};
1303   const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1304   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1305   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1306
1307   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1308   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1309   auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1310   auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1311
1312   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1313
1314   fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1315
1316   _return_fn = std::move(fn);
1317 }
1318
1319 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1320 {
1321   const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1322   const auto output_index{node.getOutputs().at(0)};
1323   auto block_size = node.param().block_size;
1324
1325   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1326   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1327
1328   auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1329
1330   fn->configure(input_tensor, block_size, output_tensor);
1331   _return_fn = std::move(fn);
1332 }
1333
1334 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1335 {
1336   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1337   const auto output_index{node.getOutputs().at(0)};
1338   auto block_size = node.param().block_size;
1339
1340   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1341   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1342
1343   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1344
1345   fn->configure(input_tensor, block_size, output_tensor);
1346   _return_fn = std::move(fn);
1347 }
1348
1349 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1350 {
1351   const auto output_index{node.getOutputs().at(0)};
1352   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1353   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1354
1355   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1356   auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1357   auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1358
1359   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1360
1361   fn->configure(shape_alloc, seed_alloc, output_alloc);
1362   _return_fn = std::move(fn);
1363 }
1364
1365 void KernelGenerator::visit(const ir::operation::SplitV &node)
1366 {
1367   const auto num_splits = node.param().num_splits;
1368   assert(num_splits == static_cast<int>(node.getOutputs().size()));
1369
1370   const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1371   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1372   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1373
1374   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1375   auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1376   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1377
1378   std::vector<IPortableTensor *> out_tensors;
1379   for (auto &output_idx : node.getOutputs())
1380     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1381
1382   auto fn = std::make_unique<ops::SplitVLayer>();
1383
1384   fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1385
1386   _return_fn = std::move(fn);
1387 }
1388
1389 void KernelGenerator::visit(const ir::operation::LSTM &node)
1390 {
1391   const auto scratch_buffer_index{
1392     node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1393   const auto output_state_out_index{
1394     node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1395   const auto cell_state_out_index{
1396     node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1397   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1398
1399   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1400   const auto input_to_input_weights_index{
1401     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1402   const auto input_to_forget_weights_index{
1403     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1404   const auto input_to_cell_weights_index{
1405     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1406   const auto input_to_output_weights_index{
1407     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1408   const auto recurrent_to_input_weights_index{
1409     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1410   const auto recurrent_to_forget_weights_index{
1411     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1412   const auto recurrent_to_cell_weights_index{
1413     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1414   const auto recurrent_to_output_weights_index{
1415     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1416   const auto cell_to_input_weights_index{
1417     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1418   const auto cell_to_forget_weights_index{
1419     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1420   const auto cell_to_output_weights_index{
1421     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1422   const auto input_gate_bias_index{
1423     node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1424   const auto forget_gate_bias_index{
1425     node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1426   const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1427   const auto output_gate_bias_index{
1428     node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1429   const auto projection_weights_index{
1430     node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1431   const auto projection_bias_index{
1432     node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1433   const auto output_state_in_index{
1434     node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1435   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1436   const auto time_major = node.param().time_major;
1437
1438   // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1439   // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1440   // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1441   // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1442   bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1443                                     (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1444                                      _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1445   bool has_recurrent_to_input_weights =
1446     _ctx.exist(recurrent_to_input_weights_index) &&
1447     (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1448      _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1449
1450   // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1451   // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1452   // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1453   // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1454   bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1455                                     _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1456   bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1457                                     _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1458
1459   bool has_input_gate_bias =
1460     _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1461
1462   bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1463                                 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1464                                  _ctx.at(projection_weights_index).shape().dim(1) != 0);
1465   bool has_projection_bias =
1466     _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1467
1468   auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1469                                  ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1470                                  : nullptr; // optional
1471   auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1472                                    ? _tensor_reg->getPortableTensor(output_state_out_index)
1473                                    : nullptr; // optional
1474   auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1475                                  ? _tensor_reg->getPortableTensor(cell_state_out_index)
1476                                  : nullptr; // optional
1477   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1478
1479   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1480
1481   auto input_to_input_weights_tensor =
1482     has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1483                                : nullptr; // optional
1484   auto input_to_forget_weights_tensor =
1485     _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1486   auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1487   auto input_to_output_weights_tensor =
1488     _tensor_reg->getPortableTensor(input_to_output_weights_index);
1489   auto recurrent_to_input_weights_tensor =
1490     has_recurrent_to_input_weights
1491       ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1492       : nullptr; // optional
1493   auto recurrent_to_forget_weights_tensor =
1494     _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1495   auto recurrent_to_cell_weights_tensor =
1496     _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1497   auto recurrent_to_output_weights_tensor =
1498     _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1499
1500   auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1501   auto cell_to_forget_weights_tensor =
1502     has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1503                                : nullptr; // optional
1504   auto cell_to_output_weights_tensor =
1505     has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1506                                : nullptr; // optional
1507
1508   auto input_gate_bias_tensor =
1509     has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1510   auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1511   auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1512   auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1513   auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1514   auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1515
1516   auto projection_weights_tensor = has_projection_weights
1517                                      ? _tensor_reg->getPortableTensor(projection_weights_index)
1518                                      : nullptr; // optional
1519   auto projection_bias_tensor = has_projection_bias
1520                                   ? _tensor_reg->getPortableTensor(projection_bias_index)
1521                                   : nullptr; // optional
1522
1523   IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1524   IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1525   IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1526   IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1527   if (node.getInputs().size() == 24)
1528   {
1529     const auto input_layer_norm_weights_index{
1530       node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1531     const auto forget_layer_norm_weights_index{
1532       node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1533     const auto cell_layer_norm_weights_index{
1534       node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1535     const auto output_layer_norm_weights_index{
1536       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1537
1538     input_layer_norm_weights_tensor =
1539       _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1540     forget_layer_norm_weights_tensor =
1541       _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1542     cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1543     output_layer_norm_weights_tensor =
1544       _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1545   }
1546
1547   auto fn = std::make_unique<ops::LSTMLayer>();
1548
1549   fn->configure(
1550     input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1551     input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
1552     recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
1553     recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
1554     cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
1555     forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
1556     output_layer_norm_weights_tensor,
1557     /*aux_input=*/nullptr,
1558     /*aux_input_to_input_weights=*/nullptr,
1559     /*aux_input_to_forget_weights=*/nullptr,
1560     /*aux_input_to_cell_weights=*/nullptr,
1561     /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1562     cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1563     projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1564     /*forward_sequence=*/true, time_major,
1565     /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1566     output_tensor,
1567     !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1568     !_ctx.at(cell_state_in_index).info().isVariable());
1569
1570   _return_fn = std::move(fn);
1571 }
1572
1573 } // namespace cpu
1574 } // namespace backend
1575 } // namespace onert