runtime/onert/backend/cpu/KernelGenerator.cc

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "KernelGenerator.h"
  18
  19 #include "ops/AddNLayer.h"
  20 #include "ops/ArgMinMaxLayer.h"
  21 #include "ops/BatchToSpaceNDLayer.h"
  22 #include "ops/BinaryArithmeticLayer.h"
  23 #include "ops/CompareLayer.h"
  24 #include "ops/ConcatLayer.h"
  25 #include "ops/ConvolutionLayer.h"
  26 #include "ops/DepthToSpaceLayer.h"
  27 #include "ops/DepthwiseConvolutionLayer.h"
  28 #include "ops/EinsumLayer.h"
  29 #include "ops/ElementwiseActivationLayer.h"
  30 #include "ops/ElementwiseBinaryLayer.h"
  31 #include "ops/ElementwiseUnaryLayer.h"
  32 #include "ops/ExpandDimsLayer.h"
  33 #include "ops/FillLayer.h"
  34 #include "ops/FullyConnectedLayer.h"
  35 #include "ops/GatherLayer.h"
  36 #include "ops/LSTMLayer.h"
  37 #include "ops/MeanLayer.h"
  38 #include "ops/OneHotLayer.h"
  39 #include "ops/OperationUtils.h"
  40 #include "ops/PackLayer.h"
  41 #include "ops/PadLayer.h"
  42 #include "ops/PoolLayer.h"
  43 #include "ops/PowLayer.h"
  44 #include "ops/RangeLayer.h"
  45 #include "ops/RankLayer.h"
  46 #include "ops/ReduceLayer.h"
  47 #include "ops/ReshapeLayer.h"
  48 #include "ops/ResizeBilinearLayer.h"
  49 #include "ops/ReverseLayer.h"
  50 #include "ops/SelectLayer.h"
  51 #include "ops/ShapeLayer.h"
  52 #include "ops/SliceLayer.h"
  53 #include "ops/SoftMaxLayer.h"
  54 #include "ops/StridedSliceLayer.h"
  55 #include "ops/SpaceToBatchNDLayer.h"
  56 #include "ops/SpaceToDepthLayer.h"
  57 #include "ops/SplitLayer.h"
  58 #include "ops/SplitVLayer.h"
  59 #include "ops/TileLayer.h"
  60 #include "ops/TransposeLayer.h"
  61 #include "ops/UnpackLayer.h"
  62 #include "ops/SquaredDiffLayer.h"
  63 #include "ops/L2NormLayer.h"
  64 #include "ops/MatrixBandPartLayer.h"
  65 #include "ops/BatchMatMulLayer.h"
  66 #include "ops/BroadcastToLayer.h"
  67 #include "ops/FusedBatchNormLayer.h"
  68 #include "ops/LogSoftMaxLayer.h"
  69 #include "ops/StatelessRandomUniformLayer.h"
  70
  71 #include <backend/Backend.h>
  72 #include <backend/IConfig.h>
  73 #include <memory>
  74 #include <util/Utils.h>
  75 #include <util/logging.h>
  76 #include <exec/DynamicShapeInferer.h>
  77
  78 #include <stdexcept>
  79
  80 namespace onert
  81 {
  82 namespace backend
  83 {
  84 namespace cpu
  85 {
  86
  87 namespace
  88 {
  89 ops::ArithmeticType
  90 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
  91 {
  92   switch (arithmetic_type_ir)
  93   {
  94     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
  95       return ops::ArithmeticType::kAdd;
  96     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
  97       return ops::ArithmeticType::kSub;
  98     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
  99       return ops::ArithmeticType::kMul;
 100     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
 101       return ops::ArithmeticType::kDiv;
 102     default:
 103       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 104   }
 105 }
 106
 107 ops::ElementwiseActivationType
 108 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
 109 {
 110   switch (type_ir)
 111   {
 112     case ir::operation::ElementwiseActivation::Type::ELU:
 113       return ops::ElementwiseActivationType::kElu;
 114     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
 115       return ops::ElementwiseActivationType::kLogistic;
 116     case ir::operation::ElementwiseActivation::Type::RELU:
 117       return ops::ElementwiseActivationType::kReLU;
 118     case ir::operation::ElementwiseActivation::Type::TANH:
 119       return ops::ElementwiseActivationType::kTanh;
 120     case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
 121       return ops::ElementwiseActivationType::kLeakyReLU;
 122     default:
 123       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 124   }
 125 }
 126
 127 ops::ElementwiseBinaryType
 128 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
 129 {
 130   switch (type_ir)
 131   {
 132     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
 133       return ops::ElementwiseBinaryType::kLogicalAnd;
 134     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
 135       return ops::ElementwiseBinaryType::kLogicalOr;
 136     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
 137       return ops::ElementwiseBinaryType::kMax;
 138     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
 139       return ops::ElementwiseBinaryType::kMin;
 140     default:
 141       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 142   }
 143 }
 144
 145 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
 146 {
 147   switch (type_ir)
 148   {
 149     case ir::operation::ElementwiseUnary::Type::ABS:
 150       return ops::ElementwiseUnaryType::kAbs;
 151     case ir::operation::ElementwiseUnary::Type::CAST:
 152       return ops::ElementwiseUnaryType::kCast;
 153     case ir::operation::ElementwiseUnary::Type::COS:
 154       return ops::ElementwiseUnaryType::kCos;
 155     case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
 156       return ops::ElementwiseUnaryType::kDequantize;
 157     case ir::operation::ElementwiseUnary::Type::ERF:
 158       return ops::ElementwiseUnaryType::kErf;
 159     case ir::operation::ElementwiseUnary::Type::EXP:
 160       return ops::ElementwiseUnaryType::kExp;
 161     case ir::operation::ElementwiseUnary::Type::FLOOR:
 162       return ops::ElementwiseUnaryType::kFloor;
 163     case ir::operation::ElementwiseUnary::Type::LOG:
 164       return ops::ElementwiseUnaryType::kLog;
 165     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
 166       return ops::ElementwiseUnaryType::kLogicalNot;
 167     case ir::operation::ElementwiseUnary::Type::NEG:
 168       return ops::ElementwiseUnaryType::kNeg;
 169     case ir::operation::ElementwiseUnary::Type::QUANTIZE:
 170       return ops::ElementwiseUnaryType::kQuantize;
 171     case ir::operation::ElementwiseUnary::Type::ROUND:
 172       return ops::ElementwiseUnaryType::kRound;
 173     case ir::operation::ElementwiseUnary::Type::RSQRT:
 174       return ops::ElementwiseUnaryType::kRSqrt;
 175     case ir::operation::ElementwiseUnary::Type::SIN:
 176       return ops::ElementwiseUnaryType::kSin;
 177     case ir::operation::ElementwiseUnary::Type::SQRT:
 178       return ops::ElementwiseUnaryType::kSqrt;
 179     case ir::operation::ElementwiseUnary::Type::SQUARE:
 180       return ops::ElementwiseUnaryType::kSquare;
 181     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
 182       return ops::ElementwiseUnaryType::kZerosLike;
 183     default:
 184       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 185   }
 186 }
 187
 188 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
 189 {
 190   switch (type_ir)
 191   {
 192     case ir::operation::Pool2D::PoolType::AVG:
 193       return ops::PoolType::kAvg;
 194     case ir::operation::Pool2D::PoolType::MAX:
 195       return ops::PoolType::kMax;
 196     default:
 197       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 198   }
 199 }
 200
 201 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 202 {
 203   switch (reduce_type_ir)
 204   {
 205     case ir::operation::Reduce::ReduceType::ALL:
 206       return ops::ReduceType::kAll;
 207     case ir::operation::Reduce::ReduceType::ANY:
 208       return ops::ReduceType::kAny;
 209     case ir::operation::Reduce::ReduceType::MAX:
 210       return ops::ReduceType::kMax;
 211     case ir::operation::Reduce::ReduceType::MIN:
 212       return ops::ReduceType::kMin;
 213     case ir::operation::Reduce::ReduceType::PROD:
 214       return ops::ReduceType::kProd;
 215     case ir::operation::Reduce::ReduceType::SUM:
 216       return ops::ReduceType::kSum;
 217     default:
 218       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 219   }
 220 }
 221 } // namespace
 222
 223 KernelGenerator::KernelGenerator(
 224     const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
 225     const std::shared_ptr<TensorBuilder> &tensor_builder,
 226     const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
 227     const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
 228     const std::shared_ptr<ExternalContext> &external_context)
 229     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
 230       _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
 231       _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
 232 {
 233   // DO NOTHING
 234 }
 235
 236 void KernelGenerator::visit(const ir::operation::AddN &node)
 237 {
 238   const auto output_index{node.getOutputs().at(0)};
 239
 240   std::vector<const IPortableTensor *> input_tensors;
 241   for (auto &input_idx : node.getInputs())
 242     input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
 243
 244   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 245
 246   auto fn = std::make_unique<ops::AddNLayer>();
 247
 248   fn->configure(std::move(input_tensors), output_tensor);
 249
 250   _return_fn = std::move(fn);
 251 }
 252
 253 void KernelGenerator::visit(const ir::OpSequence &op_seq)
 254 {
 255   assert(!_return_fn_seq);
 256   assert(_tensor_builder->dynamicTensorManager());
 257   assert(_tensor_reg);
 258
 259   auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
 260
 261   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
 262
 263   // Prepare to handle dynamic tensors later
 264   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
 265   {
 266     dyn_ctx->op_seq = &op_seq;
 267     dyn_ctx->operations = &_operations_ctx;
 268     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
 269     dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
 270
 271     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
 272   }
 273
 274   _current_layout = op_seq.getLayout();
 275   for (const auto &operation_idx : op_seq.operations())
 276   {
 277     const auto &node = _operations_ctx.at(operation_idx);
 278     node.accept(*this);
 279     _return_fn_seq->append(releaseFunction());
 280
 281     for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
 282     {
 283       auto portable_tensor = _tensor_reg->getPortableTensor(ind);
 284       if (portable_tensor)
 285       {
 286         assert(portable_tensor->layout() == ir::Layout::NHWC);
 287       }
 288
 289       auto tensor = _tensor_reg->getNativeTensor(ind);
 290       if (tensor)
 291       {
 292         tensor->increase_ref();
 293       }
 294     }
 295   }
 296 }
 297
 298 void KernelGenerator::visit(const ir::operation::Conv2D &node)
 299 {
 300   using ir::operation::Conv2D;
 301
 302   const auto ofm_index{node.getOutputs().at(0)};
 303   const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
 304   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
 305   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 306
 307   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 308   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 309   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
 310   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 311
 312   const auto stride = node.param().stride;
 313   const auto activation = node.param().activation;
 314   const auto param_padding = node.param().padding;
 315   const auto dilation = node.param().dilation;
 316   auto fn = std::make_unique<ops::ConvolutionLayer>();
 317
 318   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
 319   {
 320     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
 321                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
 322                   stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
 323                   activation, ofm_tensor);
 324
 325     _return_fn = std::move(fn);
 326     return;
 327   }
 328   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
 329   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
 330   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
 331   const auto &ker_shape = _ctx.at(ker_index).shape();
 332   const auto ker_height = ker_shape.dim(1);
 333   const auto ker_width = ker_shape.dim(2);
 334
 335   const auto padding =
 336       ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
 337                            dilation.width_factor, dilation.height_factor);
 338
 339   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
 340                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
 341                 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
 342
 343   _return_fn = std::move(fn);
 344 }
 345
 346 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 347 {
 348   using ir::operation::DepthwiseConv2D;
 349
 350   const auto ofm_index{node.getOutputs().at(0)};
 351   const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
 352   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
 353   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 354
 355   const auto stride = node.param().stride;
 356   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
 357   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
 358   // Kernel format is [1, kernel_height, kernel_width, depth_out].
 359   const auto &ker_shape = _ctx.at(ker_index).shape();
 360   const auto ker_height = ker_shape.dim(1);
 361   const auto ker_width = ker_shape.dim(2);
 362   const auto dilation_width = node.param().dilation.width_factor;
 363   const auto dilation_height = node.param().dilation.height_factor;
 364   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
 365                                             ker_width, ker_height, dilation_width, dilation_height);
 366   const auto multiplier = node.param().multiplier;
 367   const auto activation = node.param().activation;
 368
 369   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 370   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 371   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
 372   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 373
 374   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
 375
 376   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
 377                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
 378                 dilation_height, activation, ofm_tensor, _external_context);
 379
 380   _return_fn = std::move(fn);
 381 }
 382
 383 void KernelGenerator::visit(const ir::operation::Concat &node)
 384 {
 385   const auto ofm_index{node.getOutputs().at(0)};
 386
 387   const auto rank = _ctx.at(ofm_index).shape().rank();
 388   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 389
 390   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 391
 392   std::vector<const IPortableTensor *> input_tensors;
 393   for (auto &ifm_idx : node.getInputs())
 394     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 395
 396   auto fn = std::make_unique<ops::ConcatLayer>();
 397
 398   fn->configure(input_tensors, axis, output_tensor);
 399
 400   _return_fn = std::move(fn);
 401 }
 402
 403 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
 404 {
 405   const auto output_index{node.getOutputs().at(0)};
 406   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
 407   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
 408
 409   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
 410   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
 411   auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
 412
 413   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
 414
 415   IPortableTensor *crops_alloc = nullptr;
 416   const auto NNApiInputs = 2;
 417
 418   if (node.getInputs().size() != NNApiInputs)
 419   {
 420     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
 421     crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
 422   }
 423
 424   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
 425
 426   _return_fn = std::move(fn);
 427 }
 428
 429 void KernelGenerator::visit(const ir::operation::Fill &node)
 430 {
 431   const auto output_index{node.getOutputs().at(0)};
 432   // SHAPE input is used for shape inference
 433   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 434
 435   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 436   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
 437
 438   auto fn = std::make_unique<ops::FillLayer>();
 439
 440   fn->configure(value_tensor, output_tensor);
 441
 442   _return_fn = std::move(fn);
 443 }
 444
 445 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 446 {
 447   using ir::operation::FullyConnected;
 448
 449   const auto output_index{node.getOutputs().at(0)};
 450   const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
 451   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
 452   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
 453   const auto activation = node.param().activation;
 454   const auto weights_format = node.param().weights_format;
 455
 456   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 457   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 458   auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
 459   auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
 460
 461   auto fn = std::make_unique<ops::FullyConnectedLayer>();
 462
 463   fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
 464                 _external_context);
 465
 466   _return_fn = std::move(fn);
 467 }
 468
 469 void KernelGenerator::visit(const ir::operation::Reshape &node)
 470 {
 471   const auto output_index{node.getOutputs().at(0)};
 472   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 473
 474   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 475   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 476
 477   // optional 2nd input
 478   IPortableTensor *shape_tensor = nullptr;
 479
 480   if (node.getInputs().size() == 2)
 481   {
 482     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
 483     shape_tensor = _tensor_reg->getPortableTensor(shape_index);
 484   }
 485
 486   auto fn = std::make_unique<ops::ReshapeLayer>();
 487
 488   fn->configure(input_tensor, shape_tensor, output_tensor);
 489   _return_fn = std::move(fn);
 490 }
 491
 492 void KernelGenerator::visit(const ir::operation::Squeeze &node)
 493 {
 494   const auto output_index{node.getOutputs().at(0)};
 495   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
 496
 497   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 498   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 499
 500   // Squeeze can share same kernel with reshape
 501   auto fn = std::make_unique<ops::ReshapeLayer>();
 502
 503   fn->configure(input_tensor, nullptr, output_tensor);
 504
 505   _return_fn = std::move(fn);
 506 }
 507
 508 void KernelGenerator::visit(const ir::operation::Softmax &node)
 509 {
 510   const auto output_index{node.getOutputs().at(0)};
 511   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
 512
 513   const auto beta = node.param().beta;
 514
 515   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 516   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 517
 518   auto fn = std::make_unique<ops::SoftMaxLayer>();
 519
 520   fn->configure(input_tensor, beta, output_tensor);
 521
 522   _return_fn = std::move(fn);
 523 }
 524
 525 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 526 {
 527   const auto ofm_index{node.getOutputs().at(0)};
 528   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
 529   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 530
 531   const auto activation = node.param().activation;
 532
 533   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 534   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 535   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 536
 537   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
 538
 539   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
 540                 convertArithmeticType(node.param().arithmetic_type));
 541
 542   _return_fn = std::move(fn);
 543 }
 544
 545 void KernelGenerator::visit(const ir::operation::Comparison &node)
 546 {
 547   const auto ofm_index{node.getOutputs().at(0)};
 548   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
 549   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
 550
 551   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 552   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 553   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 554
 555   auto comparison_type = node.param().comparison_type;
 556
 557   auto fn = std::make_unique<ops::CompareLayer>();
 558
 559   fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
 560
 561   _return_fn = std::move(fn);
 562 }
 563
 564 void KernelGenerator::visit(const ir::operation::Gather &node)
 565 {
 566   const auto output_index{node.getOutputs().at(0)};
 567   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
 568   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 569
 570   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 571   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 572   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 573
 574   const auto backend_layout = output_tensor->layout();
 575   UNUSED_RELEASE(backend_layout);
 576
 577   // NOTE The frontend layout and backend layout must be the same for this operation.
 578   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
 579   //      is not not efficient even if it works well. If so, it would be better to set the
 580   //      layout of these backend tensors to the same layout.
 581   //      There is also one thing we have to think about. This operation depends on the layout of
 582   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
 583   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
 584   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
 585   assert(backend_layout == input_tensor->layout());
 586   assert(backend_layout == indices_tensor->layout());
 587   const auto &input_shape = _ctx.at(input_index).shape();
 588   UNUSED_RELEASE(input_shape);
 589   assert(input_shape.rank() < 4 || _current_layout == backend_layout);
 590
 591   const auto axis_raw = node.param().axis;
 592   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
 593
 594   auto fn = std::make_unique<ops::GatherLayer>();
 595
 596   fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
 597
 598   _return_fn = std::move(fn);
 599 }
 600
 601 void KernelGenerator::visit(const ir::operation::OneHot &node)
 602 {
 603   const auto output_index{node.getOutputs().at(0)};
 604   const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
 605   const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
 606   const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
 607   const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
 608
 609   const auto axis = node.param().axis;
 610
 611   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 612   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 613   auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
 614   auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
 615   auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
 616
 617   assert(indices_tensor->data_type() == OperandType::INT32);
 618   assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
 619
 620   auto fn = std::make_unique<ops::OneHotLayer>();
 621
 622   fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
 623
 624   _return_fn = std::move(fn);
 625 }
 626
 627 void KernelGenerator::visit(const ir::operation::Einsum &node)
 628 {
 629   const auto ofm_index{node.getOutputs().at(0)};
 630
 631   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 632   std::vector<const IPortableTensor *> input_tensors;
 633   for (auto &ifm_idx : node.getInputs())
 634     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 635
 636   const auto equation = node.param().equation;
 637
 638   auto fn = std::make_unique<ops::EinsumLayer>();
 639
 640   fn->configure(input_tensors, equation, output_tensor);
 641
 642   _return_fn = std::move(fn);
 643 }
 644
 645 void KernelGenerator::visit(const ir::operation::Custom &node)
 646 {
 647   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
 648                           std::vector<custom::TypeInfo> &types,
 649                           std::vector<IPortableTensor *> &tensors) {
 650     for (auto &idx : opSeq)
 651     {
 652       const auto &operand = _ctx.at(idx);
 653       // TODO make sure using `_current_layout` is correct for custom operations
 654       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
 655       auto in_tensor = _tensor_reg->getPortableTensor(idx);
 656       tensors.emplace_back(in_tensor);
 657     }
 658   };
 659
 660   backend::custom::CustomKernelConfigParams params{};
 661
 662   fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
 663   fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
 664
 665   params.userdata = node.userdata().data;
 666   params.userdata_size = node.userdata().size;
 667
 668   auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
 669
 670   _return_fn = std::move(fn);
 671 }
 672
 673 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 674 {
 675   const auto output_index{node.getOutputs().at(0)};
 676   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 677
 678   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 679   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 680
 681   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
 682
 683   fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
 684                 convertElementwiseActivationType(node.param().op_type));
 685
 686   _return_fn = std::move(fn);
 687 }
 688
 689 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
 690 {
 691   const auto output_index{node.getOutputs().at(0)};
 692   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
 693   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 694
 695   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 696   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 697   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 698
 699   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
 700
 701   fn->configure(lhs_tensor, rhs_tensor, output_tensor,
 702                 convertElementwiseBinaryType(node.param().op_type));
 703
 704   _return_fn = std::move(fn);
 705 }
 706
 707 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 708 {
 709   const auto output_index{node.getOutputs().at(0)};
 710   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 711
 712   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 713   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 714
 715   auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
 716
 717   fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
 718
 719   _return_fn = std::move(fn);
 720 }
 721
 722 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 723 {
 724   const auto output_index{node.getOutputs().at(0)};
 725   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 726   // AXIS input is used for output shape inference
 727
 728   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 729   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 730
 731   auto fn = std::make_unique<ops::ExpandDimsLayer>();
 732
 733   fn->configure(input_tensor, output_tensor);
 734
 735   _return_fn = std::move(fn);
 736 }
 737
 738 void KernelGenerator::visit(const ir::operation::Pack &node)
 739 {
 740   const auto ofm_index{node.getOutputs().at(0)};
 741
 742   const auto rank = _ctx.at(ofm_index).shape().rank();
 743   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 744
 745   assert(-rank <= axis && axis < rank);
 746
 747   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 748
 749   std::vector<const IPortableTensor *> input_tensors;
 750   for (auto &ifm_idx : node.getInputs())
 751     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 752
 753   auto fn = std::make_unique<ops::PackLayer>();
 754
 755   fn->configure(input_tensors, axis, output_tensor);
 756
 757   _return_fn = std::move(fn);
 758 }
 759
 760 void KernelGenerator::visit(const ir::operation::Unpack &node)
 761 {
 762   const auto input_index{node.getInputs().at(0)};
 763
 764   const auto rank = _ctx.at(input_index).shape().rank();
 765   const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 766
 767   assert(rank == 0 || (-rank <= axis && axis < rank));
 768
 769   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 770
 771   std::vector<IPortableTensor *> output_tensors;
 772   for (auto &output_idx : node.getOutputs())
 773     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 774
 775   auto fn = std::make_unique<ops::UnpackLayer>();
 776
 777   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
 778
 779   fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
 780
 781   _return_fn = std::move(fn);
 782 }
 783
 784 void KernelGenerator::visit(const ir::operation::Pad &node)
 785 {
 786   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
 787   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
 788   const auto output_index{node.getOutputs().at(0)};
 789   assert(_ctx.at(pad_index).data());
 790
 791   auto input = _tensor_reg->getPortableTensor(input_index);
 792   auto output = _tensor_reg->getPortableTensor(output_index);
 793   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
 794   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
 795
 796   auto fn = std::make_unique<ops::PadLayer>();
 797
 798   bool isPadV2 = node.getInputs().size() == 3 ? true : false;
 799   const void *value = nullptr;
 800
 801   if (isPadV2)
 802   {
 803     const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
 804     value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
 805   }
 806
 807   fn->configure(input, output, pad_base, pad_rank, value);
 808   _return_fn = std::move(fn);
 809 }
 810
 811 void KernelGenerator::visit(const ir::operation::Transpose &node)
 812 {
 813   const auto output_index{node.getOutputs().at(0)};
 814   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
 815   const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
 816
 817   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 818   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 819   auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
 820
 821   auto fn = std::make_unique<ops::TransposeLayer>();
 822
 823   fn->configure(input_tensor, perm_tensor, output_tensor);
 824
 825   _return_fn = std::move(fn);
 826 }
 827
 828 void KernelGenerator::visit(const ir::operation::Reduce &node)
 829 {
 830   const auto output_index{node.getOutputs().at(0)};
 831   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
 832   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 833
 834   const auto keep_dims = node.param().keep_dims;
 835   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 836   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 837   auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
 838
 839   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
 840   {
 841     auto fn = std::make_unique<ops::MeanLayer>();
 842
 843     fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
 844
 845     _return_fn = std::move(fn);
 846   }
 847   else
 848   {
 849     auto fn = std::make_unique<ops::ReduceLayer>();
 850
 851     const auto reduce_type = convertReduceType(node.param().reduce_type);
 852     fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
 853
 854     _return_fn = std::move(fn);
 855   }
 856 }
 857
 858 void KernelGenerator::visit(const ir::operation::Select &node)
 859 {
 860   const auto output_index{node.getOutputs().at(0)};
 861   const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
 862   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
 863   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 864
 865   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 866   auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
 867   auto true_tensor = _tensor_reg->getPortableTensor(true_index);
 868   auto false_tensor = _tensor_reg->getPortableTensor(false_index);
 869
 870   auto fn = std::make_unique<ops::SelectLayer>();
 871
 872   fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
 873
 874   _return_fn = std::move(fn);
 875 }
 876
 877 void KernelGenerator::visit(const ir::operation::Slice &node)
 878 {
 879   const auto output_index{node.getOutputs().at(0)};
 880   const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
 881   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
 882   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 883
 884   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 885   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 886   auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
 887   auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
 888
 889   auto fn = std::make_unique<ops::SliceLayer>();
 890
 891   fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
 892
 893   _return_fn = std::move(fn);
 894 }
 895
 896 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 897 {
 898   const auto output_index{node.getOutputs().at(0)};
 899   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
 900   const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
 901   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
 902   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 903
 904   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 905   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 906   auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
 907   auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
 908   auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
 909
 910   auto begin_mask = node.param().begin_mask;
 911   auto end_mask = node.param().end_mask;
 912   auto shrink_axis_mask = node.param().shrink_axis_mask;
 913
 914   auto fn = std::make_unique<ops::StridedSliceLayer>();
 915
 916   fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
 917                 end_mask, shrink_axis_mask);
 918
 919   _return_fn = std::move(fn);
 920 }
 921
 922 void KernelGenerator::visit(const ir::operation::Split &node)
 923 {
 924   const auto num_splits = node.param().num_splits;
 925   assert(num_splits == static_cast<int>(node.getOutputs().size()));
 926
 927   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
 928   const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
 929
 930   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
 931   auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
 932
 933   std::vector<IPortableTensor *> out_tensors;
 934   for (auto &output_idx : node.getOutputs())
 935     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 936
 937   auto fn = std::make_unique<ops::SplitLayer>();
 938
 939   fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
 940
 941   _return_fn = std::move(fn);
 942 }
 943
 944 void KernelGenerator::visit(const ir::operation::Shape &node)
 945 {
 946   const auto ofm_index{node.getOutputs().at(0)};
 947   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 948
 949   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 950   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 951
 952   auto fn = std::make_unique<ops::ShapeLayer>();
 953
 954   fn->configure(ifm_tensor, ofm_tensor);
 955
 956   _return_fn = std::move(fn);
 957 }
 958
 959 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 960 {
 961   const auto output_index{node.getOutputs().at(0)};
 962   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
 963
 964   auto align_corners = node.param().align_corners;
 965   auto half_pixel_centers = node.param().half_pixel_centers;
 966
 967   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 968   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 969
 970   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
 971
 972   if (node.getInputs().size() == 1)
 973   {
 974     fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
 975                   align_corners, half_pixel_centers);
 976   }
 977   else
 978   {
 979     assert(node.getInputs().size() == 2);
 980     const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
 981     auto size_tensor = _tensor_reg->getPortableTensor(size_index);
 982     if (size_tensor->is_constant())
 983     {
 984       auto size_vec = _ctx.at(size_index).asVector<int32_t>();
 985       const auto height_out = size_vec[0];
 986       const auto width_out = size_vec[1];
 987       fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
 988                     half_pixel_centers);
 989     }
 990     else
 991     {
 992       fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
 993     }
 994   }
 995
 996   _return_fn = std::move(fn);
 997 }
 998
 999 void KernelGenerator::visit(const ir::operation::Reverse &node)
1000 {
1001   const auto output_index{node.getOutputs().at(0)};
1002   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
1003   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
1004
1005   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1006   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1007   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1008
1009   auto fn = std::make_unique<ops::ReverseLayer>();
1010
1011   fn->configure(input_tensor, axis_tensor, output_tensor);
1012
1013   _return_fn = std::move(fn);
1014 }
1015
1016 void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1017 {
1018   const auto output_index{node.getOutputs().at(0)};
1019   const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
1020   const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
1021
1022   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1023   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1024   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1025
1026   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1027
1028   fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1029
1030   _return_fn = std::move(fn);
1031 }
1032
1033 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1034 {
1035   const auto ofm_index{node.getOutputs().at(0)};
1036   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1037
1038   const auto kh = node.param().kh;
1039   const auto kw = node.param().kw;
1040   const auto stride = node.param().stride;
1041   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
1042   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
1043   const auto padding =
1044       ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1045   const auto activation = node.param().activation;
1046
1047   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1048   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1049
1050   auto fn = std::make_unique<ops::PoolLayer>();
1051
1052   fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1053                 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1054                 convertPoolType(node.param().op_type));
1055
1056   _return_fn = std::move(fn);
1057 }
1058
1059 void KernelGenerator::visit(const ir::operation::Pow &node)
1060 {
1061   const auto output_index{node.getOutputs().at(0)};
1062   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1063   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1064
1065   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1066   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1067   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1068
1069   auto fn = std::make_unique<ops::PowLayer>();
1070
1071   fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1072
1073   _return_fn = std::move(fn);
1074 }
1075
1076 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1077 {
1078   const auto output_index{node.getOutputs().at(0)};
1079   const auto input_index{node.getInputs().at(0)};
1080
1081   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1082   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1083
1084   auto fn = std::make_unique<ops::L2NormLayer>();
1085
1086   fn->configure(input_alloc, output_alloc);
1087
1088   _return_fn = std::move(fn);
1089 }
1090
1091 void KernelGenerator::visit(const ir::operation::Range &node)
1092 {
1093   const auto output_index{node.getOutputs().at(0)};
1094   const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1095   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1096   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1097
1098   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1099   auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1100   auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1101   auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1102
1103   auto fn = std::make_unique<ops::RangeLayer>();
1104
1105   fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1106   _return_fn = std::move(fn);
1107 }
1108
1109 void KernelGenerator::visit(const ir::operation::Rank &node)
1110 {
1111   const auto ofm_index{node.getOutputs().at(0)};
1112   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1113
1114   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1115   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1116
1117   auto fn = std::make_unique<ops::RankLayer>();
1118
1119   fn->configure(ifm_tensor, ofm_tensor);
1120
1121   _return_fn = std::move(fn);
1122 }
1123
1124 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1125 {
1126   const auto ofm_index{node.getOutputs().at(0)};
1127   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1128   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1129
1130   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1131   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1132   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1133
1134   auto fn = std::make_unique<ops::SqDiffLayer>();
1135
1136   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1137   _return_fn = std::move(fn);
1138 }
1139
1140 void KernelGenerator::visit(const ir::operation::Tile &node)
1141 {
1142   const auto output_index{node.getOutputs().at(0)};
1143   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1144   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1145
1146   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1147   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1148   auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1149
1150   auto fn = std::make_unique<ops::TileLayer>();
1151
1152   fn->configure(input_tensor, multiples_tensor, output_tensor);
1153   _return_fn = std::move(fn);
1154 }
1155
1156 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1157 {
1158   const auto output_index{node.getOutputs().at(0)};
1159   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1160   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1161   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1162
1163   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1164   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1165   auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1166   auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1167
1168   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1169
1170   fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1171   _return_fn = std::move(fn);
1172 }
1173
1174 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1175 {
1176   const auto output_index{node.getOutputs().at(0)};
1177   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1178   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1179
1180   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1181   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1182   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1183
1184   const auto adj_x = node.param().adj_x;
1185   const auto adj_y = node.param().adj_y;
1186
1187   auto fn = std::make_unique<ops::BatchMatMulLayer>();
1188
1189   fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1190   _return_fn = std::move(fn);
1191 }
1192
1193 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1194 {
1195   const auto output_index{node.getOutputs().at(0)};
1196   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1197   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1198
1199   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1200   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1201   auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1202
1203   auto fn = std::make_unique<ops::BroadcastToLayer>();
1204
1205   fn->configure(input_tensor, shape_tensor, output_tensor);
1206
1207   _return_fn = std::move(fn);
1208 }
1209
1210 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1211 {
1212   const auto ofm_index{node.getOutputs().at(0)};
1213
1214   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1215   std::vector<const IPortableTensor *> input_tensors;
1216   for (auto &ifm_idx : node.getInputs())
1217     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1218
1219   const auto epsilon = node.param().epsilon;
1220   const auto is_training = node.param().is_training;
1221   const auto data_format = node.param().data_format;
1222
1223   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1224
1225   fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1226
1227   _return_fn = std::move(fn);
1228 }
1229
1230 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1231 {
1232   const auto output_index{node.getOutputs().at(0)};
1233   const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1234
1235   const auto beta = node.param().beta;
1236   const auto axis = node.param().axis;
1237
1238   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1239   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1240
1241   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1242
1243   fn->configure(input_tensor, beta, axis, output_tensor);
1244
1245   _return_fn = std::move(fn);
1246 }
1247
1248 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1249 {
1250   const auto output_index{node.getOutputs().at(0)};
1251   const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1252   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1253   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1254
1255   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1256   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1257   auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1258   auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1259
1260   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1261
1262   fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1263
1264   _return_fn = std::move(fn);
1265 }
1266
1267 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1268 {
1269   const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1270   const auto output_index{node.getOutputs().at(0)};
1271   auto block_size = node.param().block_size;
1272
1273   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1274   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1275
1276   auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1277
1278   fn->configure(input_tensor, block_size, output_tensor);
1279   _return_fn = std::move(fn);
1280 }
1281
1282 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1283 {
1284   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1285   const auto output_index{node.getOutputs().at(0)};
1286   auto block_size = node.param().block_size;
1287
1288   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1289   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1290
1291   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1292
1293   fn->configure(input_tensor, block_size, output_tensor);
1294   _return_fn = std::move(fn);
1295 }
1296
1297 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1298 {
1299   const auto output_index{node.getOutputs().at(0)};
1300   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1301   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1302
1303   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1304   auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1305   auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1306
1307   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1308
1309   fn->configure(shape_alloc, seed_alloc, output_alloc);
1310   _return_fn = std::move(fn);
1311 }
1312
1313 void KernelGenerator::visit(const ir::operation::SplitV &node)
1314 {
1315   const auto num_splits = node.param().num_splits;
1316   assert(num_splits == static_cast<int>(node.getOutputs().size()));
1317
1318   const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1319   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1320   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1321
1322   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1323   auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1324   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1325
1326   std::vector<IPortableTensor *> out_tensors;
1327   for (auto &output_idx : node.getOutputs())
1328     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1329
1330   auto fn = std::make_unique<ops::SplitVLayer>();
1331
1332   fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1333
1334   _return_fn = std::move(fn);
1335 }
1336
1337 void KernelGenerator::visit(const ir::operation::LSTM &node)
1338 {
1339   const auto scratch_buffer_index{
1340       node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1341   const auto output_state_out_index{
1342       node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1343   const auto cell_state_out_index{
1344       node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1345   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1346
1347   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1348   const auto input_to_input_weights_index{
1349       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1350   const auto input_to_forget_weights_index{
1351       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1352   const auto input_to_cell_weights_index{
1353       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1354   const auto input_to_output_weights_index{
1355       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1356   const auto recurrent_to_input_weights_index{
1357       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1358   const auto recurrent_to_forget_weights_index{
1359       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1360   const auto recurrent_to_cell_weights_index{
1361       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1362   const auto recurrent_to_output_weights_index{
1363       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1364   const auto cell_to_input_weights_index{
1365       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1366   const auto cell_to_forget_weights_index{
1367       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1368   const auto cell_to_output_weights_index{
1369       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1370   const auto input_gate_bias_index{
1371       node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1372   const auto forget_gate_bias_index{
1373       node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1374   const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1375   const auto output_gate_bias_index{
1376       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1377   const auto projection_weights_index{
1378       node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1379   const auto projection_bias_index{
1380       node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1381   const auto output_state_in_index{
1382       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1383   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1384   const auto time_major = node.param().time_major;
1385
1386   // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1387   // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1388   // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1389   // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1390   bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1391                                     (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1392                                      _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1393   bool has_recurrent_to_input_weights =
1394       _ctx.exist(recurrent_to_input_weights_index) &&
1395       (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1396        _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1397
1398   // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1399   // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1400   // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1401   // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1402   bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1403                                     _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1404   bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1405                                     _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1406
1407   bool has_input_gate_bias =
1408       _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1409
1410   bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1411                                 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1412                                  _ctx.at(projection_weights_index).shape().dim(1) != 0);
1413   bool has_projection_bias =
1414       _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1415
1416   auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1417                                    ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1418                                    : nullptr; // optional
1419   auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1420                                      ? _tensor_reg->getPortableTensor(output_state_out_index)
1421                                      : nullptr; // optional
1422   auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1423                                    ? _tensor_reg->getPortableTensor(cell_state_out_index)
1424                                    : nullptr; // optional
1425   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1426
1427   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1428
1429   auto input_to_input_weights_tensor =
1430       has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1431                                  : nullptr; // optional
1432   auto input_to_forget_weights_tensor =
1433       _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1434   auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1435   auto input_to_output_weights_tensor =
1436       _tensor_reg->getPortableTensor(input_to_output_weights_index);
1437   auto recurrent_to_input_weights_tensor =
1438       has_recurrent_to_input_weights
1439           ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1440           : nullptr; // optional
1441   auto recurrent_to_forget_weights_tensor =
1442       _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1443   auto recurrent_to_cell_weights_tensor =
1444       _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1445   auto recurrent_to_output_weights_tensor =
1446       _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1447
1448   auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1449   auto cell_to_forget_weights_tensor =
1450       has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1451                                  : nullptr; // optional
1452   auto cell_to_output_weights_tensor =
1453       has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1454                                  : nullptr; // optional
1455
1456   auto input_gate_bias_tensor =
1457       has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1458   auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1459   auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1460   auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1461   auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1462   auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1463
1464   auto projection_weights_tensor = has_projection_weights
1465                                        ? _tensor_reg->getPortableTensor(projection_weights_index)
1466                                        : nullptr; // optional
1467   auto projection_bias_tensor = has_projection_bias
1468                                     ? _tensor_reg->getPortableTensor(projection_bias_index)
1469                                     : nullptr; // optional
1470
1471   IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1472   IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1473   IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1474   IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1475   if (node.getInputs().size() == 24)
1476   {
1477     const auto input_layer_norm_weights_index{
1478         node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1479     const auto forget_layer_norm_weights_index{
1480         node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1481     const auto cell_layer_norm_weights_index{
1482         node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1483     const auto output_layer_norm_weights_index{
1484         node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1485
1486     input_layer_norm_weights_tensor =
1487         _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1488     forget_layer_norm_weights_tensor =
1489         _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1490     cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1491     output_layer_norm_weights_tensor =
1492         _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1493   }
1494
1495   auto fn = std::make_unique<ops::LSTMLayer>();
1496
1497   fn->configure(
1498       input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1499       input_to_cell_weights_tensor, input_to_output_weights_tensor,
1500       recurrent_to_input_weights_tensor, recurrent_to_forget_weights_tensor,
1501       recurrent_to_cell_weights_tensor, recurrent_to_output_weights_tensor,
1502       cell_to_input_weights_tensor, cell_to_forget_weights_tensor, cell_to_output_weights_tensor,
1503       input_layer_norm_weights_tensor, forget_layer_norm_weights_tensor,
1504       cell_layer_norm_weights_tensor, output_layer_norm_weights_tensor,
1505       /*aux_input=*/nullptr,
1506       /*aux_input_to_input_weights=*/nullptr,
1507       /*aux_input_to_forget_weights=*/nullptr,
1508       /*aux_input_to_cell_weights=*/nullptr,
1509       /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1510       cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1511       projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1512       /*forward_sequence=*/true, time_major,
1513       /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1514       output_tensor,
1515       !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1516       !_ctx.at(cell_state_in_index).info().isVariable());
1517
1518   _return_fn = std::move(fn);
1519 }
1520
1521 } // namespace cpu
1522 } // namespace backend
1523 } // namespace onert