runtime/onert/backend/cpu/KernelGenerator.cc

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "KernelGenerator.h"
  18
  19 #include "ops/AddNLayer.h"
  20 #include "ops/ArgMinMaxLayer.h"
  21 #include "ops/BatchToSpaceNDLayer.h"
  22 #include "ops/BinaryArithmeticLayer.h"
  23 #include "ops/CompareLayer.h"
  24 #include "ops/ConcatLayer.h"
  25 #include "ops/ConvolutionLayer.h"
  26 #include "ops/DepthToSpaceLayer.h"
  27 #include "ops/DepthwiseConvolutionLayer.h"
  28 #include "ops/EinsumLayer.h"
  29 #include "ops/ElementwiseActivationLayer.h"
  30 #include "ops/ElementwiseBinaryLayer.h"
  31 #include "ops/ElementwiseUnaryLayer.h"
  32 #include "ops/ExpandDimsLayer.h"
  33 #include "ops/FillLayer.h"
  34 #include "ops/FullyConnectedLayer.h"
  35 #include "ops/GatherLayer.h"
  36 #include "ops/LSTMLayer.h"
  37 #include "ops/MeanLayer.h"
  38 #include "ops/DetectionPostProcessLayer.h"
  39 #include "ops/OneHotLayer.h"
  40 #include "ops/OperationUtils.h"
  41 #include "ops/PackLayer.h"
  42 #include "ops/PadLayer.h"
  43 #include "ops/PoolLayer.h"
  44 #include "ops/PowLayer.h"
  45 #include "ops/QuantizeLayer.h"
  46 #include "ops/RangeLayer.h"
  47 #include "ops/RankLayer.h"
  48 #include "ops/ReduceLayer.h"
  49 #include "ops/ReshapeLayer.h"
  50 #include "ops/ResizeBilinearLayer.h"
  51 #include "ops/ReverseLayer.h"
  52 #include "ops/SelectLayer.h"
  53 #include "ops/ShapeLayer.h"
  54 #include "ops/SliceLayer.h"
  55 #include "ops/SoftMaxLayer.h"
  56 #include "ops/StridedSliceLayer.h"
  57 #include "ops/SpaceToBatchNDLayer.h"
  58 #include "ops/SpaceToDepthLayer.h"
  59 #include "ops/SplitLayer.h"
  60 #include "ops/SplitVLayer.h"
  61 #include "ops/TileLayer.h"
  62 #include "ops/TransposeLayer.h"
  63 #include "ops/UnpackLayer.h"
  64 #include "ops/SquaredDiffLayer.h"
  65 #include "ops/L2NormLayer.h"
  66 #include "ops/MatrixBandPartLayer.h"
  67 #include "ops/BatchMatMulLayer.h"
  68 #include "ops/BroadcastToLayer.h"
  69 #include "ops/FusedBatchNormLayer.h"
  70 #include "ops/LogSoftMaxLayer.h"
  71 #include "ops/StatelessRandomUniformLayer.h"
  72
  73 #include <backend/Backend.h>
  74 #include <backend/IConfig.h>
  75 #include <memory>
  76 #include <util/Utils.h>
  77 #include <util/logging.h>
  78 #include <exec/DynamicShapeInferer.h>
  79
  80 #include <stdexcept>
  81
  82 namespace onert
  83 {
  84 namespace backend
  85 {
  86 namespace cpu
  87 {
  88
  89 namespace
  90 {
  91 ops::ArithmeticType
  92 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
  93 {
  94   switch (arithmetic_type_ir)
  95   {
  96     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
  97       return ops::ArithmeticType::kAdd;
  98     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
  99       return ops::ArithmeticType::kSub;
 100     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
 101       return ops::ArithmeticType::kMul;
 102     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
 103       return ops::ArithmeticType::kDiv;
 104     default:
 105       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 106   }
 107 }
 108
 109 ops::ElementwiseActivationType
 110 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
 111 {
 112   switch (type_ir)
 113   {
 114     case ir::operation::ElementwiseActivation::Type::ELU:
 115       return ops::ElementwiseActivationType::kElu;
 116     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
 117       return ops::ElementwiseActivationType::kLogistic;
 118     case ir::operation::ElementwiseActivation::Type::RELU:
 119       return ops::ElementwiseActivationType::kReLU;
 120     case ir::operation::ElementwiseActivation::Type::TANH:
 121       return ops::ElementwiseActivationType::kTanh;
 122     case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
 123       return ops::ElementwiseActivationType::kLeakyReLU;
 124     default:
 125       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 126   }
 127 }
 128
 129 ops::ElementwiseBinaryType
 130 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
 131 {
 132   switch (type_ir)
 133   {
 134     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::FLOOR_DIV:
 135       return ops::ElementwiseBinaryType::kFloorDiv;
 136     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
 137       return ops::ElementwiseBinaryType::kLogicalAnd;
 138     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
 139       return ops::ElementwiseBinaryType::kLogicalOr;
 140     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
 141       return ops::ElementwiseBinaryType::kMax;
 142     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
 143       return ops::ElementwiseBinaryType::kMin;
 144     default:
 145       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 146   }
 147 }
 148
 149 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
 150 {
 151   switch (type_ir)
 152   {
 153     case ir::operation::ElementwiseUnary::Type::ABS:
 154       return ops::ElementwiseUnaryType::kAbs;
 155     case ir::operation::ElementwiseUnary::Type::CAST:
 156       return ops::ElementwiseUnaryType::kCast;
 157     case ir::operation::ElementwiseUnary::Type::COS:
 158       return ops::ElementwiseUnaryType::kCos;
 159     case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
 160       return ops::ElementwiseUnaryType::kDequantize;
 161     case ir::operation::ElementwiseUnary::Type::ERF:
 162       return ops::ElementwiseUnaryType::kErf;
 163     case ir::operation::ElementwiseUnary::Type::EXP:
 164       return ops::ElementwiseUnaryType::kExp;
 165     case ir::operation::ElementwiseUnary::Type::FLOOR:
 166       return ops::ElementwiseUnaryType::kFloor;
 167     case ir::operation::ElementwiseUnary::Type::LOG:
 168       return ops::ElementwiseUnaryType::kLog;
 169     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
 170       return ops::ElementwiseUnaryType::kLogicalNot;
 171     case ir::operation::ElementwiseUnary::Type::NEG:
 172       return ops::ElementwiseUnaryType::kNeg;
 173     case ir::operation::ElementwiseUnary::Type::QUANTIZE:
 174       return ops::ElementwiseUnaryType::kQuantize;
 175     case ir::operation::ElementwiseUnary::Type::ROUND:
 176       return ops::ElementwiseUnaryType::kRound;
 177     case ir::operation::ElementwiseUnary::Type::RSQRT:
 178       return ops::ElementwiseUnaryType::kRSqrt;
 179     case ir::operation::ElementwiseUnary::Type::SIN:
 180       return ops::ElementwiseUnaryType::kSin;
 181     case ir::operation::ElementwiseUnary::Type::SQRT:
 182       return ops::ElementwiseUnaryType::kSqrt;
 183     case ir::operation::ElementwiseUnary::Type::SQUARE:
 184       return ops::ElementwiseUnaryType::kSquare;
 185     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
 186       return ops::ElementwiseUnaryType::kZerosLike;
 187     default:
 188       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 189   }
 190 }
 191
 192 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
 193 {
 194   switch (type_ir)
 195   {
 196     case ir::operation::Pool2D::PoolType::AVG:
 197       return ops::PoolType::kAvg;
 198     case ir::operation::Pool2D::PoolType::MAX:
 199       return ops::PoolType::kMax;
 200     default:
 201       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 202   }
 203 }
 204
 205 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 206 {
 207   switch (reduce_type_ir)
 208   {
 209     case ir::operation::Reduce::ReduceType::ALL:
 210       return ops::ReduceType::kAll;
 211     case ir::operation::Reduce::ReduceType::ANY:
 212       return ops::ReduceType::kAny;
 213     case ir::operation::Reduce::ReduceType::MAX:
 214       return ops::ReduceType::kMax;
 215     case ir::operation::Reduce::ReduceType::MIN:
 216       return ops::ReduceType::kMin;
 217     case ir::operation::Reduce::ReduceType::PROD:
 218       return ops::ReduceType::kProd;
 219     case ir::operation::Reduce::ReduceType::SUM:
 220       return ops::ReduceType::kSum;
 221     default:
 222       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 223   }
 224 }
 225 } // namespace
 226
 227 KernelGenerator::KernelGenerator(
 228   const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
 229   const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
 230   const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
 231   const std::shared_ptr<ExternalContext> &external_context)
 232   : basic::KernelGeneratorBase{graph},
 233     _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
 234     _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
 235     _external_context(external_context)
 236 {
 237   // DO NOTHING
 238 }
 239
 240 std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
 241 {
 242   auto ret = std::make_unique<exec::FunctionSequence>();
 243
 244   assert(_tensor_builder->dynamicTensorManager());
 245   assert(_tensor_reg);
 246
 247   // Prepare to handle dynamic tensors later
 248   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
 249   {
 250     dyn_ctx->op = &_operations_ctx.at(ind);
 251     dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
 252   }
 253   ret->dynamic_tensor_ctx(dyn_ctx);
 254
 255   auto &op = _graph.operations().at(ind);
 256   op.accept(*this);
 257   assert(_return_fn); // _return_fn must have been generated
 258   ret->append(std::move(_return_fn));
 259
 260   for (auto &&ind : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
 261   {
 262     auto portable_tensor = _tensor_reg->getPortableTensor(ind);
 263     if (portable_tensor)
 264     {
 265       assert(portable_tensor->layout() == ir::Layout::NHWC);
 266     }
 267
 268     auto tensor = _tensor_reg->getNativeTensor(ind);
 269     if (tensor)
 270     {
 271       tensor->increase_ref();
 272     }
 273   }
 274   return ret;
 275 }
 276
 277 void KernelGenerator::visit(const ir::operation::AddN &node)
 278 {
 279   const auto output_index{node.getOutputs().at(0)};
 280
 281   std::vector<const IPortableTensor *> input_tensors;
 282   for (const auto &input_idx : node.getInputs())
 283     input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
 284
 285   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 286
 287   auto fn = std::make_unique<ops::AddNLayer>();
 288
 289   fn->configure(std::move(input_tensors), output_tensor);
 290
 291   _return_fn = std::move(fn);
 292 }
 293
 294 void KernelGenerator::visit(const ir::operation::Conv2D &node)
 295 {
 296   using ir::operation::Conv2D;
 297
 298   const auto ofm_index{node.getOutputs().at(0)};
 299   const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
 300   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
 301   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 302
 303   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 304   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 305   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
 306   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 307
 308   const auto stride = node.param().stride;
 309   const auto activation = node.param().activation;
 310   const auto param_padding = node.param().padding;
 311   const auto dilation = node.param().dilation;
 312   auto fn = std::make_unique<ops::ConvolutionLayer>();
 313
 314   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
 315   {
 316     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
 317                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
 318                   stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
 319                   activation, ofm_tensor);
 320
 321     _return_fn = std::move(fn);
 322     return;
 323   }
 324   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
 325   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
 326   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
 327   const auto &ker_shape = _ctx.at(ker_index).shape();
 328   const auto ker_height = ker_shape.dim(1);
 329   const auto ker_width = ker_shape.dim(2);
 330
 331   const auto padding =
 332     ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
 333                          dilation.width_factor, dilation.height_factor);
 334
 335   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
 336                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
 337                 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
 338
 339   _return_fn = std::move(fn);
 340 }
 341
 342 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 343 {
 344   using ir::operation::DepthwiseConv2D;
 345
 346   const auto ofm_index{node.getOutputs().at(0)};
 347   const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
 348   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
 349   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 350
 351   const auto stride = node.param().stride;
 352   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
 353   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
 354   // Kernel format is [1, kernel_height, kernel_width, depth_out].
 355   const auto &ker_shape = _ctx.at(ker_index).shape();
 356   const auto ker_height = ker_shape.dim(1);
 357   const auto ker_width = ker_shape.dim(2);
 358   const auto dilation_width = node.param().dilation.width_factor;
 359   const auto dilation_height = node.param().dilation.height_factor;
 360   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
 361                                             ker_width, ker_height, dilation_width, dilation_height);
 362   const auto multiplier = node.param().multiplier;
 363   const auto activation = node.param().activation;
 364
 365   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 366   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 367   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
 368   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 369
 370   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
 371
 372   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
 373                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
 374                 dilation_height, activation, ofm_tensor, _external_context);
 375
 376   _return_fn = std::move(fn);
 377 }
 378
 379 void KernelGenerator::visit(const ir::operation::Concat &node)
 380 {
 381   const auto ofm_index{node.getOutputs().at(0)};
 382
 383   const auto rank = _ctx.at(ofm_index).shape().rank();
 384   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 385
 386   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 387
 388   std::vector<const IPortableTensor *> input_tensors;
 389   for (const auto &ifm_idx : node.getInputs())
 390     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 391
 392   auto fn = std::make_unique<ops::ConcatLayer>();
 393
 394   fn->configure(input_tensors, axis, output_tensor);
 395
 396   _return_fn = std::move(fn);
 397 }
 398
 399 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
 400 {
 401   const auto output_index{node.getOutputs().at(0)};
 402   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
 403   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
 404
 405   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
 406   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
 407   auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
 408
 409   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
 410
 411   IPortableTensor *crops_alloc = nullptr;
 412   const auto NNApiInputs = 2;
 413
 414   if (node.getInputs().size() != NNApiInputs)
 415   {
 416     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
 417     crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
 418   }
 419
 420   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
 421
 422   _return_fn = std::move(fn);
 423 }
 424
 425 void KernelGenerator::visit(const ir::operation::Fill &node)
 426 {
 427   const auto output_index{node.getOutputs().at(0)};
 428   // SHAPE input is used for shape inference
 429   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 430
 431   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 432   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
 433
 434   auto fn = std::make_unique<ops::FillLayer>();
 435
 436   fn->configure(value_tensor, output_tensor);
 437
 438   _return_fn = std::move(fn);
 439 }
 440
 441 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 442 {
 443   using ir::operation::FullyConnected;
 444
 445   const auto output_index{node.getOutputs().at(0)};
 446   const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
 447   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
 448   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
 449   const auto activation = node.param().activation;
 450   const auto weights_format = node.param().weights_format;
 451
 452   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 453   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 454   auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
 455   auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
 456
 457   auto fn = std::make_unique<ops::FullyConnectedLayer>();
 458
 459   fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
 460                 _external_context);
 461
 462   _return_fn = std::move(fn);
 463 }
 464
 465 void KernelGenerator::visit(const ir::operation::Reshape &node)
 466 {
 467   const auto output_index{node.getOutputs().at(0)};
 468   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 469
 470   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 471   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 472
 473   // optional 2nd input
 474   IPortableTensor *shape_tensor = nullptr;
 475
 476   if (node.getInputs().size() == 2)
 477   {
 478     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
 479     shape_tensor = _tensor_reg->getPortableTensor(shape_index);
 480   }
 481
 482   auto fn = std::make_unique<ops::ReshapeLayer>();
 483
 484   fn->configure(input_tensor, shape_tensor, output_tensor);
 485   _return_fn = std::move(fn);
 486 }
 487
 488 void KernelGenerator::visit(const ir::operation::Squeeze &node)
 489 {
 490   const auto output_index{node.getOutputs().at(0)};
 491   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
 492
 493   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 494   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 495
 496   // Squeeze can share same kernel with reshape
 497   auto fn = std::make_unique<ops::ReshapeLayer>();
 498
 499   fn->configure(input_tensor, nullptr, output_tensor);
 500
 501   _return_fn = std::move(fn);
 502 }
 503
 504 void KernelGenerator::visit(const ir::operation::Softmax &node)
 505 {
 506   const auto output_index{node.getOutputs().at(0)};
 507   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
 508
 509   const auto beta = node.param().beta;
 510
 511   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 512   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 513
 514   auto fn = std::make_unique<ops::SoftMaxLayer>();
 515
 516   fn->configure(input_tensor, beta, output_tensor);
 517
 518   _return_fn = std::move(fn);
 519 }
 520
 521 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 522 {
 523   const auto ofm_index{node.getOutputs().at(0)};
 524   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
 525   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 526
 527   const auto activation = node.param().activation;
 528
 529   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 530   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 531   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 532
 533   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
 534
 535   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
 536                 convertArithmeticType(node.param().arithmetic_type));
 537
 538   _return_fn = std::move(fn);
 539 }
 540
 541 void KernelGenerator::visit(const ir::operation::Comparison &node)
 542 {
 543   const auto ofm_index{node.getOutputs().at(0)};
 544   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
 545   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
 546
 547   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 548   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 549   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 550
 551   auto comparison_type = node.param().comparison_type;
 552
 553   auto fn = std::make_unique<ops::CompareLayer>();
 554
 555   fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
 556
 557   _return_fn = std::move(fn);
 558 }
 559
 560 void KernelGenerator::visit(const ir::operation::Gather &node)
 561 {
 562   const auto output_index{node.getOutputs().at(0)};
 563   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
 564   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 565
 566   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 567   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 568   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 569
 570   const auto backend_layout = output_tensor->layout();
 571   UNUSED_RELEASE(backend_layout);
 572
 573   // NOTE The frontend layout and backend layout must be the same for this operation.
 574   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
 575   //      is not not efficient even if it works well. If so, it would be better to set the
 576   //      layout of these backend tensors to the same layout.
 577   //      There is also one thing we have to think about. This operation depends on the layout of
 578   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
 579   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
 580   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
 581   assert(backend_layout == input_tensor->layout());
 582   assert(backend_layout == indices_tensor->layout());
 583   const auto &input_shape = _ctx.at(input_index).shape();
 584   UNUSED_RELEASE(input_shape);
 585   assert(input_shape.rank() < 4 || _current_layout == backend_layout);
 586
 587   const auto axis_raw = node.param().axis;
 588   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
 589
 590   auto fn = std::make_unique<ops::GatherLayer>();
 591
 592   fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
 593
 594   _return_fn = std::move(fn);
 595 }
 596
 597 void KernelGenerator::visit(const ir::operation::OneHot &node)
 598 {
 599   const auto output_index{node.getOutputs().at(0)};
 600   const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
 601   const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
 602   const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
 603   const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
 604
 605   const auto axis = node.param().axis;
 606
 607   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 608   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 609   auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
 610   auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
 611   auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
 612
 613   assert(indices_tensor->data_type() == OperandType::INT32);
 614   assert(axis <= static_cast<int>(indices_tensor->getShape().rank()));
 615
 616   auto fn = std::make_unique<ops::OneHotLayer>();
 617
 618   fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
 619
 620   _return_fn = std::move(fn);
 621 }
 622
 623 void KernelGenerator::visit(const ir::operation::Einsum &node)
 624 {
 625   const auto ofm_index{node.getOutputs().at(0)};
 626
 627   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 628   std::vector<const IPortableTensor *> input_tensors;
 629   for (const auto &ifm_idx : node.getInputs())
 630     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 631
 632   const auto equation = node.param().equation;
 633
 634   auto fn = std::make_unique<ops::EinsumLayer>();
 635
 636   fn->configure(input_tensors, equation, output_tensor);
 637
 638   _return_fn = std::move(fn);
 639 }
 640
 641 void KernelGenerator::visit(const ir::operation::Custom &node)
 642 {
 643   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
 644                           std::vector<custom::TypeInfo> &types,
 645                           std::vector<IPortableTensor *> &tensors) {
 646     for (const auto &idx : opSeq)
 647     {
 648       const auto &operand = _ctx.at(idx);
 649       // TODO make sure using `_current_layout` is correct for custom operations
 650       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
 651       auto in_tensor = _tensor_reg->getPortableTensor(idx);
 652       tensors.emplace_back(in_tensor);
 653     }
 654   };
 655
 656   backend::custom::CustomKernelConfigParams params{};
 657
 658   fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
 659   fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
 660
 661   params.userdata = node.userdata().data;
 662   params.userdata_size = node.userdata().size;
 663
 664   auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
 665
 666   _return_fn = std::move(fn);
 667 }
 668
 669 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 670 {
 671   const auto output_index{node.getOutputs().at(0)};
 672   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 673
 674   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 675   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 676
 677   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
 678
 679   fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
 680                 convertElementwiseActivationType(node.param().op_type));
 681
 682   _return_fn = std::move(fn);
 683 }
 684
 685 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
 686 {
 687   const auto output_index{node.getOutputs().at(0)};
 688   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
 689   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 690
 691   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 692   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 693   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 694
 695   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
 696
 697   fn->configure(lhs_tensor, rhs_tensor, output_tensor,
 698                 convertElementwiseBinaryType(node.param().op_type));
 699
 700   _return_fn = std::move(fn);
 701 }
 702
 703 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 704 {
 705   const auto output_index{node.getOutputs().at(0)};
 706   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 707
 708   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 709   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 710
 711   if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
 712   {
 713     auto fn = std::make_unique<ops::QuantizeLayer>();
 714     fn->configure(input_tensor, output_tensor);
 715     _return_fn = std::move(fn);
 716   }
 717   else
 718   {
 719     auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
 720     fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
 721     _return_fn = std::move(fn);
 722   }
 723 }
 724
 725 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 726 {
 727   const auto output_index{node.getOutputs().at(0)};
 728   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 729   // AXIS input is used for output shape inference
 730
 731   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 732   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 733
 734   auto fn = std::make_unique<ops::ExpandDimsLayer>();
 735
 736   fn->configure(input_tensor, output_tensor);
 737
 738   _return_fn = std::move(fn);
 739 }
 740
 741 void KernelGenerator::visit(const ir::operation::Pack &node)
 742 {
 743   const auto ofm_index{node.getOutputs().at(0)};
 744
 745   const auto rank = _ctx.at(ofm_index).shape().rank();
 746   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 747
 748   assert(-rank <= axis && axis < rank);
 749
 750   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 751
 752   std::vector<const IPortableTensor *> input_tensors;
 753   for (const auto &ifm_idx : node.getInputs())
 754     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 755
 756   auto fn = std::make_unique<ops::PackLayer>();
 757
 758   fn->configure(input_tensors, axis, output_tensor);
 759
 760   _return_fn = std::move(fn);
 761 }
 762
 763 void KernelGenerator::visit(const ir::operation::Unpack &node)
 764 {
 765   const auto input_index{node.getInputs().at(0)};
 766
 767   const auto rank = _ctx.at(input_index).shape().rank();
 768   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 769
 770   assert(rank == 0 || (-rank <= axis && axis < rank));
 771
 772   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 773
 774   std::vector<IPortableTensor *> output_tensors;
 775   for (const auto &output_idx : node.getOutputs())
 776     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 777
 778   auto fn = std::make_unique<ops::UnpackLayer>();
 779
 780   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
 781
 782   fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
 783
 784   _return_fn = std::move(fn);
 785 }
 786
 787 void KernelGenerator::visit(const ir::operation::Pad &node)
 788 {
 789   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
 790   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
 791   const auto output_index{node.getOutputs().at(0)};
 792   assert(_ctx.at(pad_index).data());
 793
 794   auto input = _tensor_reg->getPortableTensor(input_index);
 795   auto output = _tensor_reg->getPortableTensor(output_index);
 796   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
 797   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
 798
 799   auto fn = std::make_unique<ops::PadLayer>();
 800
 801   bool isPadV2 = node.getInputs().size() == 3 ? true : false;
 802   const void *value = nullptr;
 803
 804   if (isPadV2)
 805   {
 806     const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
 807     value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
 808   }
 809
 810   fn->configure(input, output, pad_base, pad_rank, value);
 811   _return_fn = std::move(fn);
 812 }
 813
 814 void KernelGenerator::visit(const ir::operation::Transpose &node)
 815 {
 816   const auto output_index{node.getOutputs().at(0)};
 817   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
 818   const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
 819
 820   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 821   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 822   auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
 823
 824   auto fn = std::make_unique<ops::TransposeLayer>();
 825
 826   fn->configure(input_tensor, perm_tensor, output_tensor);
 827
 828   _return_fn = std::move(fn);
 829 }
 830
 831 void KernelGenerator::visit(const ir::operation::Reduce &node)
 832 {
 833   const auto output_index{node.getOutputs().at(0)};
 834   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
 835   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 836
 837   const auto keep_dims = node.param().keep_dims;
 838   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 839   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 840   auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
 841
 842   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
 843   {
 844     auto fn = std::make_unique<ops::MeanLayer>();
 845
 846     fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
 847
 848     _return_fn = std::move(fn);
 849   }
 850   else
 851   {
 852     auto fn = std::make_unique<ops::ReduceLayer>();
 853
 854     const auto reduce_type = convertReduceType(node.param().reduce_type);
 855     fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
 856
 857     _return_fn = std::move(fn);
 858   }
 859 }
 860
 861 void KernelGenerator::visit(const ir::operation::Select &node)
 862 {
 863   const auto output_index{node.getOutputs().at(0)};
 864   const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
 865   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
 866   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 867
 868   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 869   auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
 870   auto true_tensor = _tensor_reg->getPortableTensor(true_index);
 871   auto false_tensor = _tensor_reg->getPortableTensor(false_index);
 872
 873   auto fn = std::make_unique<ops::SelectLayer>();
 874
 875   fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
 876
 877   _return_fn = std::move(fn);
 878 }
 879
 880 void KernelGenerator::visit(const ir::operation::Slice &node)
 881 {
 882   const auto output_index{node.getOutputs().at(0)};
 883   const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
 884   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
 885   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 886
 887   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 888   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 889   auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
 890   auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
 891
 892   auto fn = std::make_unique<ops::SliceLayer>();
 893
 894   fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
 895
 896   _return_fn = std::move(fn);
 897 }
 898
 899 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 900 {
 901   const auto output_index{node.getOutputs().at(0)};
 902   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
 903   const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
 904   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
 905   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 906
 907   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 908   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 909   auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
 910   auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
 911   auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
 912
 913   auto begin_mask = node.param().begin_mask;
 914   auto end_mask = node.param().end_mask;
 915   auto shrink_axis_mask = node.param().shrink_axis_mask;
 916
 917   auto fn = std::make_unique<ops::StridedSliceLayer>();
 918
 919   fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
 920                 end_mask, shrink_axis_mask);
 921
 922   _return_fn = std::move(fn);
 923 }
 924
 925 void KernelGenerator::visit(const ir::operation::Split &node)
 926 {
 927   const auto num_splits = node.param().num_splits;
 928   assert(num_splits == static_cast<int>(node.getOutputs().size()));
 929
 930   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
 931   const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
 932
 933   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
 934   auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
 935
 936   std::vector<IPortableTensor *> out_tensors;
 937   for (const auto &output_idx : node.getOutputs())
 938     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 939
 940   auto fn = std::make_unique<ops::SplitLayer>();
 941
 942   fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
 943
 944   _return_fn = std::move(fn);
 945 }
 946
 947 void KernelGenerator::visit(const ir::operation::Shape &node)
 948 {
 949   const auto ofm_index{node.getOutputs().at(0)};
 950   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 951
 952   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 953   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 954
 955   auto fn = std::make_unique<ops::ShapeLayer>();
 956
 957   fn->configure(ifm_tensor, ofm_tensor);
 958
 959   _return_fn = std::move(fn);
 960 }
 961
 962 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 963 {
 964   const auto output_index{node.getOutputs().at(0)};
 965   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
 966
 967   auto align_corners = node.param().align_corners;
 968   auto half_pixel_centers = node.param().half_pixel_centers;
 969
 970   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 971   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 972
 973   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
 974
 975   if (node.getInputs().size() == 1)
 976   {
 977     fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
 978                   align_corners, half_pixel_centers);
 979   }
 980   else
 981   {
 982     assert(node.getInputs().size() == 2);
 983     const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
 984     auto size_tensor = _tensor_reg->getPortableTensor(size_index);
 985     if (size_tensor->is_constant())
 986     {
 987       auto size_vec = _ctx.at(size_index).asVector<int32_t>();
 988       const auto height_out = size_vec[0];
 989       const auto width_out = size_vec[1];
 990       fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
 991                     half_pixel_centers);
 992     }
 993     else
 994     {
 995       fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
 996     }
 997   }
 998
 999   _return_fn = std::move(fn);
1000 }
1001
1002 void KernelGenerator::visit(const ir::operation::Reverse &node)
1003 {
1004   const auto output_index{node.getOutputs().at(0)};
1005   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
1006   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
1007
1008   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1009   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1010   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1011
1012   auto fn = std::make_unique<ops::ReverseLayer>();
1013
1014   fn->configure(input_tensor, axis_tensor, output_tensor);
1015
1016   _return_fn = std::move(fn);
1017 }
1018
1019 void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1020 {
1021   const auto output_index{node.getOutputs().at(0)};
1022   const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
1023   const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
1024
1025   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1026   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1027   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1028
1029   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1030
1031   fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1032
1033   _return_fn = std::move(fn);
1034 }
1035
1036 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1037 {
1038   const auto ofm_index{node.getOutputs().at(0)};
1039   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1040
1041   const auto kh = node.param().kh;
1042   const auto kw = node.param().kw;
1043   const auto stride = node.param().stride;
1044   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
1045   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
1046   const auto padding =
1047     ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1048   const auto activation = node.param().activation;
1049
1050   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1051   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1052
1053   auto fn = std::make_unique<ops::PoolLayer>();
1054
1055   fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1056                 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1057                 convertPoolType(node.param().op_type));
1058
1059   _return_fn = std::move(fn);
1060 }
1061
1062 void KernelGenerator::visit(const ir::operation::Pow &node)
1063 {
1064   const auto output_index{node.getOutputs().at(0)};
1065   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1066   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1067
1068   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1069   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1070   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1071
1072   auto fn = std::make_unique<ops::PowLayer>();
1073
1074   fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1075
1076   _return_fn = std::move(fn);
1077 }
1078
1079 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1080 {
1081   const auto output_index{node.getOutputs().at(0)};
1082   const auto input_index{node.getInputs().at(0)};
1083
1084   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1085   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1086
1087   auto fn = std::make_unique<ops::L2NormLayer>();
1088
1089   fn->configure(input_alloc, output_alloc);
1090
1091   _return_fn = std::move(fn);
1092 }
1093
1094 void KernelGenerator::visit(const ir::operation::Range &node)
1095 {
1096   const auto output_index{node.getOutputs().at(0)};
1097   const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1098   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1099   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1100
1101   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1102   auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1103   auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1104   auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1105
1106   auto fn = std::make_unique<ops::RangeLayer>();
1107
1108   fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1109   _return_fn = std::move(fn);
1110 }
1111
1112 void KernelGenerator::visit(const ir::operation::Rank &node)
1113 {
1114   const auto ofm_index{node.getOutputs().at(0)};
1115   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1116
1117   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1118   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1119
1120   auto fn = std::make_unique<ops::RankLayer>();
1121
1122   fn->configure(ifm_tensor, ofm_tensor);
1123
1124   _return_fn = std::move(fn);
1125 }
1126
1127 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1128 {
1129   const auto ofm_index{node.getOutputs().at(0)};
1130   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1131   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1132
1133   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1134   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1135   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1136
1137   auto fn = std::make_unique<ops::SqDiffLayer>();
1138
1139   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1140   _return_fn = std::move(fn);
1141 }
1142
1143 void KernelGenerator::visit(const ir::operation::Tile &node)
1144 {
1145   const auto output_index{node.getOutputs().at(0)};
1146   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1147   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1148
1149   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1150   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1151   auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1152
1153   auto fn = std::make_unique<ops::TileLayer>();
1154
1155   fn->configure(input_tensor, multiples_tensor, output_tensor);
1156   _return_fn = std::move(fn);
1157 }
1158
1159 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1160 {
1161   const auto output_index{node.getOutputs().at(0)};
1162   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1163   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1164   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1165
1166   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1167   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1168   auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1169   auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1170
1171   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1172
1173   fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1174   _return_fn = std::move(fn);
1175 }
1176
1177 void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
1178 {
1179   using NMS = ir::operation::DetectionPostProcess;
1180
1181   ops::DetectionPostProcessLayer::DetectionPostProcessParameters parameters;
1182   parameters.scales.y = node.param().scale.y_scale;
1183   parameters.scales.x = node.param().scale.x_scale;
1184   parameters.scales.w = node.param().scale.w_scale;
1185   parameters.scales.h = node.param().scale.h_scale;
1186
1187   parameters.iou_threshold = node.param().iou_threshold;
1188   parameters.score_threshold = node.param().score_threshold;
1189   parameters.max_boxes_per_class = node.param().max_boxes_per_class;
1190   parameters.max_detections = node.param().max_detections;
1191   parameters.num_classes = node.param().num_classes;
1192   parameters.center_box_format = node.param().center_size_boxes;
1193   parameters.max_classes_per_detection = node.param().max_classes_per_detection;
1194
1195   auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
1196   auto scores_index = node.getInputs().at(NMS::Input::SCORES);
1197   auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
1198
1199   auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
1200   auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
1201   auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
1202   auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
1203
1204   parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
1205   parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
1206
1207   parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
1208   parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
1209   parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
1210
1211   parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
1212   parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
1213   parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
1214   parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
1215
1216   auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
1217   fn->configure(std::move(parameters));
1218
1219   _return_fn = std::move(fn);
1220 }
1221
1222 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1223 {
1224   const auto output_index{node.getOutputs().at(0)};
1225   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1226   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1227
1228   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1229   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1230   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1231
1232   const auto adj_x = node.param().adj_x;
1233   const auto adj_y = node.param().adj_y;
1234
1235   auto fn = std::make_unique<ops::BatchMatMulLayer>();
1236
1237   fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1238   _return_fn = std::move(fn);
1239 }
1240
1241 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1242 {
1243   const auto output_index{node.getOutputs().at(0)};
1244   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1245   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1246
1247   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1248   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1249   auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1250
1251   auto fn = std::make_unique<ops::BroadcastToLayer>();
1252
1253   fn->configure(input_tensor, shape_tensor, output_tensor);
1254
1255   _return_fn = std::move(fn);
1256 }
1257
1258 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1259 {
1260   const auto ofm_index{node.getOutputs().at(0)};
1261
1262   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1263   std::vector<const IPortableTensor *> input_tensors;
1264   for (const auto &ifm_idx : node.getInputs())
1265     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1266
1267   const auto epsilon = node.param().epsilon;
1268   const auto is_training = node.param().is_training;
1269   const auto data_format = node.param().data_format;
1270
1271   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1272
1273   fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1274
1275   _return_fn = std::move(fn);
1276 }
1277
1278 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1279 {
1280   const auto output_index{node.getOutputs().at(0)};
1281   const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1282
1283   const auto beta = node.param().beta;
1284   const auto axis = node.param().axis;
1285
1286   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1287   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1288
1289   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1290
1291   fn->configure(input_tensor, beta, axis, output_tensor);
1292
1293   _return_fn = std::move(fn);
1294 }
1295
1296 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1297 {
1298   const auto output_index{node.getOutputs().at(0)};
1299   const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1300   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1301   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1302
1303   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1304   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1305   auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1306   auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1307
1308   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1309
1310   fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1311
1312   _return_fn = std::move(fn);
1313 }
1314
1315 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1316 {
1317   const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1318   const auto output_index{node.getOutputs().at(0)};
1319   auto block_size = node.param().block_size;
1320
1321   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1322   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1323
1324   auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1325
1326   fn->configure(input_tensor, block_size, output_tensor);
1327   _return_fn = std::move(fn);
1328 }
1329
1330 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1331 {
1332   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1333   const auto output_index{node.getOutputs().at(0)};
1334   auto block_size = node.param().block_size;
1335
1336   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1337   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1338
1339   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1340
1341   fn->configure(input_tensor, block_size, output_tensor);
1342   _return_fn = std::move(fn);
1343 }
1344
1345 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1346 {
1347   const auto output_index{node.getOutputs().at(0)};
1348   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1349   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1350
1351   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1352   auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1353   auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1354
1355   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1356
1357   fn->configure(shape_alloc, seed_alloc, output_alloc);
1358   _return_fn = std::move(fn);
1359 }
1360
1361 void KernelGenerator::visit(const ir::operation::SplitV &node)
1362 {
1363   const auto num_splits = node.param().num_splits;
1364   assert(num_splits == static_cast<int>(node.getOutputs().size()));
1365
1366   const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1367   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1368   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1369
1370   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1371   auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1372   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1373
1374   std::vector<IPortableTensor *> out_tensors;
1375   for (const auto &output_idx : node.getOutputs())
1376     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1377
1378   auto fn = std::make_unique<ops::SplitVLayer>();
1379
1380   fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1381
1382   _return_fn = std::move(fn);
1383 }
1384
1385 void KernelGenerator::visit(const ir::operation::LSTM &node)
1386 {
1387   const auto scratch_buffer_index{
1388     node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1389   const auto output_state_out_index{
1390     node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1391   const auto cell_state_out_index{
1392     node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1393   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1394
1395   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1396   const auto input_to_input_weights_index{
1397     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1398   const auto input_to_forget_weights_index{
1399     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1400   const auto input_to_cell_weights_index{
1401     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1402   const auto input_to_output_weights_index{
1403     node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1404   const auto recurrent_to_input_weights_index{
1405     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1406   const auto recurrent_to_forget_weights_index{
1407     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1408   const auto recurrent_to_cell_weights_index{
1409     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1410   const auto recurrent_to_output_weights_index{
1411     node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1412   const auto cell_to_input_weights_index{
1413     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1414   const auto cell_to_forget_weights_index{
1415     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1416   const auto cell_to_output_weights_index{
1417     node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1418   const auto input_gate_bias_index{
1419     node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1420   const auto forget_gate_bias_index{
1421     node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1422   const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1423   const auto output_gate_bias_index{
1424     node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1425   const auto projection_weights_index{
1426     node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1427   const auto projection_bias_index{
1428     node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1429   const auto output_state_in_index{
1430     node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1431   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1432   const auto time_major = node.param().time_major;
1433
1434   // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1435   // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1436   // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1437   // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1438   bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1439                                     (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1440                                      _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1441   bool has_recurrent_to_input_weights =
1442     _ctx.exist(recurrent_to_input_weights_index) &&
1443     (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1444      _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1445
1446   // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1447   // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1448   // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1449   // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1450   bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1451                                     _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1452   bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1453                                     _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1454
1455   bool has_input_gate_bias =
1456     _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1457
1458   bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1459                                 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1460                                  _ctx.at(projection_weights_index).shape().dim(1) != 0);
1461   bool has_projection_bias =
1462     _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1463
1464   auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1465                                  ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1466                                  : nullptr; // optional
1467   auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1468                                    ? _tensor_reg->getPortableTensor(output_state_out_index)
1469                                    : nullptr; // optional
1470   auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1471                                  ? _tensor_reg->getPortableTensor(cell_state_out_index)
1472                                  : nullptr; // optional
1473   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1474
1475   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1476
1477   auto input_to_input_weights_tensor =
1478     has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1479                                : nullptr; // optional
1480   auto input_to_forget_weights_tensor =
1481     _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1482   auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1483   auto input_to_output_weights_tensor =
1484     _tensor_reg->getPortableTensor(input_to_output_weights_index);
1485   auto recurrent_to_input_weights_tensor =
1486     has_recurrent_to_input_weights
1487       ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1488       : nullptr; // optional
1489   auto recurrent_to_forget_weights_tensor =
1490     _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1491   auto recurrent_to_cell_weights_tensor =
1492     _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1493   auto recurrent_to_output_weights_tensor =
1494     _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1495
1496   auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1497   auto cell_to_forget_weights_tensor =
1498     has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1499                                : nullptr; // optional
1500   auto cell_to_output_weights_tensor =
1501     has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1502                                : nullptr; // optional
1503
1504   auto input_gate_bias_tensor =
1505     has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1506   auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1507   auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1508   auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1509   auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1510   auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1511
1512   auto projection_weights_tensor = has_projection_weights
1513                                      ? _tensor_reg->getPortableTensor(projection_weights_index)
1514                                      : nullptr; // optional
1515   auto projection_bias_tensor = has_projection_bias
1516                                   ? _tensor_reg->getPortableTensor(projection_bias_index)
1517                                   : nullptr; // optional
1518
1519   IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1520   IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1521   IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1522   IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1523   if (node.getInputs().size() == 24)
1524   {
1525     const auto input_layer_norm_weights_index{
1526       node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1527     const auto forget_layer_norm_weights_index{
1528       node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1529     const auto cell_layer_norm_weights_index{
1530       node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1531     const auto output_layer_norm_weights_index{
1532       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1533
1534     input_layer_norm_weights_tensor =
1535       _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1536     forget_layer_norm_weights_tensor =
1537       _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1538     cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1539     output_layer_norm_weights_tensor =
1540       _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1541   }
1542
1543   auto fn = std::make_unique<ops::LSTMLayer>();
1544
1545   fn->configure(
1546     input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1547     input_to_cell_weights_tensor, input_to_output_weights_tensor, recurrent_to_input_weights_tensor,
1548     recurrent_to_forget_weights_tensor, recurrent_to_cell_weights_tensor,
1549     recurrent_to_output_weights_tensor, cell_to_input_weights_tensor, cell_to_forget_weights_tensor,
1550     cell_to_output_weights_tensor, input_layer_norm_weights_tensor,
1551     forget_layer_norm_weights_tensor, cell_layer_norm_weights_tensor,
1552     output_layer_norm_weights_tensor,
1553     /*aux_input=*/nullptr,
1554     /*aux_input_to_input_weights=*/nullptr,
1555     /*aux_input_to_forget_weights=*/nullptr,
1556     /*aux_input_to_cell_weights=*/nullptr,
1557     /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1558     cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1559     projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1560     /*forward_sequence=*/true, time_major,
1561     /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1562     output_tensor,
1563     !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1564     !_ctx.at(cell_state_in_index).info().isVariable());
1565
1566   _return_fn = std::move(fn);
1567 }
1568
1569 } // namespace cpu
1570 } // namespace backend
1571 } // namespace onert