runtime/onert/backend/cpu/KernelGenerator.cc

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "KernelGenerator.h"
  18
  19 #include "ops/AddNLayer.h"
  20 #include "ops/ArgMinMaxLayer.h"
  21 #include "ops/BatchToSpaceNDLayer.h"
  22 #include "ops/BinaryArithmeticLayer.h"
  23 #include "ops/CompareLayer.h"
  24 #include "ops/ConcatLayer.h"
  25 #include "ops/ConvolutionLayer.h"
  26 #include "ops/DepthwiseConvolutionLayer.h"
  27 #include "ops/EinsumLayer.h"
  28 #include "ops/ElementwiseActivationLayer.h"
  29 #include "ops/ElementwiseBinaryLayer.h"
  30 #include "ops/ElementwiseUnaryLayer.h"
  31 #include "ops/ExpandDimsLayer.h"
  32 #include "ops/FillLayer.h"
  33 #include "ops/FullyConnectedLayer.h"
  34 #include "ops/GatherLayer.h"
  35 #include "ops/LSTMLayer.h"
  36 #include "ops/MeanLayer.h"
  37 #include "ops/OneHotLayer.h"
  38 #include "ops/OperationUtils.h"
  39 #include "ops/PackLayer.h"
  40 #include "ops/PadLayer.h"
  41 #include "ops/PoolLayer.h"
  42 #include "ops/PowLayer.h"
  43 #include "ops/RangeLayer.h"
  44 #include "ops/RankLayer.h"
  45 #include "ops/ReduceLayer.h"
  46 #include "ops/ReshapeLayer.h"
  47 #include "ops/ResizeBilinearLayer.h"
  48 #include "ops/ReverseLayer.h"
  49 #include "ops/SelectLayer.h"
  50 #include "ops/ShapeLayer.h"
  51 #include "ops/SliceLayer.h"
  52 #include "ops/SoftMaxLayer.h"
  53 #include "ops/StridedSliceLayer.h"
  54 #include "ops/SpaceToBatchNDLayer.h"
  55 #include "ops/SpaceToDepthLayer.h"
  56 #include "ops/SplitLayer.h"
  57 #include "ops/SplitVLayer.h"
  58 #include "ops/TileLayer.h"
  59 #include "ops/TransposeLayer.h"
  60 #include "ops/UnpackLayer.h"
  61 #include "ops/SquaredDiffLayer.h"
  62 #include "ops/L2NormLayer.h"
  63 #include "ops/MatrixBandPartLayer.h"
  64 #include "ops/BatchMatMulLayer.h"
  65 #include "ops/BroadcastToLayer.h"
  66 #include "ops/FusedBatchNormLayer.h"
  67 #include "ops/LogSoftMaxLayer.h"
  68 #include "ops/StatelessRandomUniformLayer.h"
  69
  70 #include <backend/Backend.h>
  71 #include <backend/IConfig.h>
  72 #include <memory>
  73 #include <util/Utils.h>
  74 #include <util/logging.h>
  75 #include <exec/DynamicShapeInferer.h>
  76
  77 #include <stdexcept>
  78
  79 namespace onert
  80 {
  81 namespace backend
  82 {
  83 namespace cpu
  84 {
  85
  86 namespace
  87 {
  88 ops::ArithmeticType
  89 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
  90 {
  91   switch (arithmetic_type_ir)
  92   {
  93     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
  94       return ops::ArithmeticType::kAdd;
  95     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
  96       return ops::ArithmeticType::kSub;
  97     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
  98       return ops::ArithmeticType::kMul;
  99     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
 100       return ops::ArithmeticType::kDiv;
 101     default:
 102       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 103   }
 104 }
 105
 106 ops::ElementwiseActivationType
 107 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
 108 {
 109   switch (type_ir)
 110   {
 111     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
 112       return ops::ElementwiseActivationType::kLogistic;
 113     case ir::operation::ElementwiseActivation::Type::RELU:
 114       return ops::ElementwiseActivationType::kReLU;
 115     case ir::operation::ElementwiseActivation::Type::TANH:
 116       return ops::ElementwiseActivationType::kTanh;
 117     default:
 118       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 119   }
 120 }
 121
 122 ops::ElementwiseBinaryType
 123 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
 124 {
 125   switch (type_ir)
 126   {
 127     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
 128       return ops::ElementwiseBinaryType::kLogicalOr;
 129     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
 130       return ops::ElementwiseBinaryType::kMax;
 131     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
 132       return ops::ElementwiseBinaryType::kMin;
 133     default:
 134       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 135   }
 136 }
 137
 138 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
 139 {
 140   switch (type_ir)
 141   {
 142     case ir::operation::ElementwiseUnary::Type::ABS:
 143       return ops::ElementwiseUnaryType::kAbs;
 144     case ir::operation::ElementwiseUnary::Type::CAST:
 145       return ops::ElementwiseUnaryType::kCast;
 146     case ir::operation::ElementwiseUnary::Type::COS:
 147       return ops::ElementwiseUnaryType::kCos;
 148     case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
 149       return ops::ElementwiseUnaryType::kDequantize;
 150     case ir::operation::ElementwiseUnary::Type::ERF:
 151       return ops::ElementwiseUnaryType::kErf;
 152     case ir::operation::ElementwiseUnary::Type::EXP:
 153       return ops::ElementwiseUnaryType::kExp;
 154     case ir::operation::ElementwiseUnary::Type::FLOOR:
 155       return ops::ElementwiseUnaryType::kFloor;
 156     case ir::operation::ElementwiseUnary::Type::LOG:
 157       return ops::ElementwiseUnaryType::kLog;
 158     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
 159       return ops::ElementwiseUnaryType::kLogicalNot;
 160     case ir::operation::ElementwiseUnary::Type::NEG:
 161       return ops::ElementwiseUnaryType::kNeg;
 162     case ir::operation::ElementwiseUnary::Type::QUANTIZE:
 163       return ops::ElementwiseUnaryType::kQuantize;
 164     case ir::operation::ElementwiseUnary::Type::ROUND:
 165       return ops::ElementwiseUnaryType::kRound;
 166     case ir::operation::ElementwiseUnary::Type::RSQRT:
 167       return ops::ElementwiseUnaryType::kRSqrt;
 168     case ir::operation::ElementwiseUnary::Type::SIN:
 169       return ops::ElementwiseUnaryType::kSin;
 170     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
 171       return ops::ElementwiseUnaryType::kZerosLike;
 172     default:
 173       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 174   }
 175 }
 176
 177 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
 178 {
 179   switch (type_ir)
 180   {
 181     case ir::operation::Pool2D::PoolType::AVG:
 182       return ops::PoolType::kAvg;
 183     case ir::operation::Pool2D::PoolType::MAX:
 184       return ops::PoolType::kMax;
 185     default:
 186       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 187   }
 188 }
 189
 190 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 191 {
 192   switch (reduce_type_ir)
 193   {
 194     case ir::operation::Reduce::ReduceType::ALL:
 195       return ops::ReduceType::kAll;
 196     case ir::operation::Reduce::ReduceType::ANY:
 197       return ops::ReduceType::kAny;
 198     case ir::operation::Reduce::ReduceType::MAX:
 199       return ops::ReduceType::kMax;
 200     case ir::operation::Reduce::ReduceType::MIN:
 201       return ops::ReduceType::kMin;
 202     case ir::operation::Reduce::ReduceType::PROD:
 203       return ops::ReduceType::kProd;
 204     case ir::operation::Reduce::ReduceType::SUM:
 205       return ops::ReduceType::kSum;
 206     default:
 207       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 208   }
 209 }
 210 } // namespace
 211
 212 KernelGenerator::KernelGenerator(
 213     const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
 214     const std::shared_ptr<TensorBuilder> &tensor_builder,
 215     const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
 216     const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
 217     const std::shared_ptr<ExternalContext> &external_context)
 218     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
 219       _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
 220       _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
 221 {
 222   // DO NOTHING
 223 }
 224
 225 void KernelGenerator::visit(const ir::operation::AddN &node)
 226 {
 227   const auto output_index{node.getOutputs().at(0)};
 228
 229   std::vector<const IPortableTensor *> input_tensors;
 230   for (auto &input_idx : node.getInputs())
 231     input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
 232
 233   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 234
 235   auto fn = std::make_unique<ops::AddNLayer>();
 236
 237   fn->configure(std::move(input_tensors), output_tensor);
 238
 239   _return_fn = std::move(fn);
 240 }
 241
 242 void KernelGenerator::visit(const ir::OpSequence &op_seq)
 243 {
 244   assert(!_return_fn_seq);
 245   assert(_tensor_builder->dynamicTensorManager());
 246   assert(_tensor_reg);
 247
 248   auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
 249
 250   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
 251
 252   // Prepare to handle dynamic tensors later
 253   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
 254   {
 255     dyn_ctx->op_seq = &op_seq;
 256     dyn_ctx->operations = &_operations_ctx;
 257     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
 258     dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
 259
 260     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
 261   }
 262
 263   _current_op_seq_layout = op_seq.getLayout();
 264   for (const auto &operation_idx : op_seq.operations())
 265   {
 266     const auto &node = _operations_ctx.at(operation_idx);
 267     node.accept(*this);
 268     _return_fn_seq->append(releaseFunction());
 269
 270     for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
 271     {
 272       auto portable_tensor = _tensor_reg->getPortableTensor(ind);
 273       if (portable_tensor)
 274       {
 275         assert(portable_tensor->layout() == ir::Layout::NHWC);
 276       }
 277
 278       auto tensor = _tensor_reg->getNativeTensor(ind);
 279       if (tensor)
 280       {
 281         tensor->increase_ref();
 282       }
 283     }
 284   }
 285 }
 286
 287 void KernelGenerator::visit(const ir::operation::Conv2D &node)
 288 {
 289   using ir::operation::Conv2D;
 290
 291   const auto ofm_index{node.getOutputs().at(0)};
 292   const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
 293   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
 294   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 295
 296   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 297   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 298   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
 299   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 300
 301   const auto stride = node.param().stride;
 302   const auto activation = node.param().activation;
 303   const auto param_padding = node.param().padding;
 304   const auto dilation = node.param().dilation;
 305   auto fn = std::make_unique<ops::ConvolutionLayer>();
 306
 307   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
 308   {
 309     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
 310                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
 311                   stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
 312                   activation, ofm_tensor);
 313
 314     _return_fn = std::move(fn);
 315     return;
 316   }
 317   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
 318   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
 319   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
 320   const auto &ker_shape = _ctx.at(ker_index).shape();
 321   const auto ker_height = ker_shape.dim(1);
 322   const auto ker_width = ker_shape.dim(2);
 323
 324   const auto padding =
 325       ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
 326                            dilation.width_factor, dilation.height_factor);
 327
 328   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
 329                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
 330                 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
 331
 332   _return_fn = std::move(fn);
 333 }
 334
 335 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 336 {
 337   using ir::operation::DepthwiseConv2D;
 338
 339   const auto ofm_index{node.getOutputs().at(0)};
 340   const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
 341   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
 342   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 343
 344   const auto stride = node.param().stride;
 345   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
 346   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
 347   // Kernel format is [1, kernel_height, kernel_width, depth_out].
 348   const auto &ker_shape = _ctx.at(ker_index).shape();
 349   const auto ker_height = ker_shape.dim(1);
 350   const auto ker_width = ker_shape.dim(2);
 351   const auto dilation_width = node.param().dilation.width_factor;
 352   const auto dilation_height = node.param().dilation.height_factor;
 353   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
 354                                             ker_width, ker_height, dilation_width, dilation_height);
 355   const auto multiplier = node.param().multiplier;
 356   const auto activation = node.param().activation;
 357
 358   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 359   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 360   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
 361   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
 362
 363   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
 364
 365   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
 366                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
 367                 dilation_height, activation, ofm_tensor);
 368
 369   _return_fn = std::move(fn);
 370 }
 371
 372 void KernelGenerator::visit(const ir::operation::Concat &node)
 373 {
 374   const auto ofm_index{node.getOutputs().at(0)};
 375
 376   const auto rank = _ctx.at(ofm_index).shape().rank();
 377   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 378
 379   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 380
 381   std::vector<const IPortableTensor *> input_tensors;
 382   for (auto &ifm_idx : node.getInputs())
 383     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 384
 385   auto fn = std::make_unique<ops::ConcatLayer>();
 386
 387   fn->configure(input_tensors, axis, output_tensor);
 388
 389   _return_fn = std::move(fn);
 390 }
 391
 392 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
 393 {
 394   const auto output_index{node.getOutputs().at(0)};
 395   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
 396   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
 397
 398   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
 399   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
 400   auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
 401
 402   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
 403
 404   IPortableTensor *crops_alloc = nullptr;
 405   const auto NNApiInputs = 2;
 406
 407   if (node.getInputs().size() != NNApiInputs)
 408   {
 409     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
 410     crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
 411   }
 412
 413   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
 414
 415   _return_fn = std::move(fn);
 416 }
 417
 418 void KernelGenerator::visit(const ir::operation::Fill &node)
 419 {
 420   const auto output_index{node.getOutputs().at(0)};
 421   const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
 422   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 423
 424   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 425   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 426   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
 427
 428   auto fn = std::make_unique<ops::FillLayer>();
 429
 430   fn->configure(input_tensor, value_tensor, output_tensor);
 431
 432   _return_fn = std::move(fn);
 433 }
 434
 435 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 436 {
 437   using ir::operation::FullyConnected;
 438
 439   const auto output_index{node.getOutputs().at(0)};
 440   const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
 441   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
 442   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
 443   const auto activation = node.param().activation;
 444   const auto weights_format = node.param().weights_format;
 445
 446   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 447   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 448   auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
 449   auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
 450
 451   auto fn = std::make_unique<ops::FullyConnectedLayer>();
 452
 453   fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
 454                 _external_context);
 455
 456   _return_fn = std::move(fn);
 457 }
 458
 459 void KernelGenerator::visit(const ir::operation::Reshape &node)
 460 {
 461   const auto output_index{node.getOutputs().at(0)};
 462   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 463
 464   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 465   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 466
 467   // optional 2nd input
 468   IPortableTensor *shape_tensor = nullptr;
 469
 470   if (node.getInputs().size() == 2)
 471   {
 472     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
 473     shape_tensor = _tensor_reg->getPortableTensor(shape_index);
 474   }
 475
 476   auto fn = std::make_unique<ops::ReshapeLayer>();
 477
 478   fn->configure(input_tensor, shape_tensor, output_tensor);
 479   _return_fn = std::move(fn);
 480 }
 481
 482 void KernelGenerator::visit(const ir::operation::Squeeze &node)
 483 {
 484   const auto output_index{node.getOutputs().at(0)};
 485   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
 486
 487   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 488   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 489
 490   // Squeeze can share same kernel with reshape
 491   auto fn = std::make_unique<ops::ReshapeLayer>();
 492
 493   fn->configure(input_tensor, nullptr, output_tensor);
 494
 495   _return_fn = std::move(fn);
 496 }
 497
 498 void KernelGenerator::visit(const ir::operation::Softmax &node)
 499 {
 500   const auto output_index{node.getOutputs().at(0)};
 501   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
 502
 503   const auto beta = node.param().beta;
 504
 505   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 506   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 507
 508   auto fn = std::make_unique<ops::SoftMaxLayer>();
 509
 510   fn->configure(input_tensor, beta, output_tensor);
 511
 512   _return_fn = std::move(fn);
 513 }
 514
 515 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 516 {
 517   const auto ofm_index{node.getOutputs().at(0)};
 518   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
 519   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 520
 521   const auto activation = node.param().activation;
 522
 523   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 524   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 525   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 526
 527   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
 528
 529   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
 530                 convertArithmeticType(node.param().arithmetic_type));
 531
 532   _return_fn = std::move(fn);
 533 }
 534
 535 void KernelGenerator::visit(const ir::operation::Comparison &node)
 536 {
 537   const auto ofm_index{node.getOutputs().at(0)};
 538   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
 539   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
 540
 541   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 542   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 543   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 544
 545   auto comparison_type = node.param().comparison_type;
 546
 547   auto fn = std::make_unique<ops::CompareLayer>();
 548
 549   fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
 550
 551   _return_fn = std::move(fn);
 552 }
 553
 554 void KernelGenerator::visit(const ir::operation::Gather &node)
 555 {
 556   const auto output_index{node.getOutputs().at(0)};
 557   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
 558   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 559
 560   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 561   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 562   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 563
 564   const auto backend_layout = output_tensor->layout();
 565   UNUSED_RELEASE(backend_layout);
 566
 567   // NOTE The frontend layout and backend layout must be the same for this operation.
 568   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
 569   //      is not not efficient even if it works well. If so, it would be better to set the
 570   //      layout of these backend tensors to the same layout.
 571   //      There is also one thing we have to think about. This operation depends on the layout of
 572   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
 573   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
 574   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
 575   assert(backend_layout == input_tensor->layout());
 576   assert(backend_layout == indices_tensor->layout());
 577   const auto &input_shape = _ctx.at(input_index).shape();
 578   UNUSED_RELEASE(input_shape);
 579   assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
 580
 581   const auto axis_raw = node.param().axis;
 582   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
 583
 584   auto fn = std::make_unique<ops::GatherLayer>();
 585
 586   fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
 587
 588   _return_fn = std::move(fn);
 589 }
 590
 591 void KernelGenerator::visit(const ir::operation::OneHot &node)
 592 {
 593   const auto output_index{node.getOutputs().at(0)};
 594   const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
 595   const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
 596   const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
 597   const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
 598
 599   const auto axis = node.param().axis;
 600
 601   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 602   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
 603   auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
 604   auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
 605   auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
 606
 607   assert(indices_tensor->data_type() == OperandType::INT32);
 608   assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
 609
 610   auto fn = std::make_unique<ops::OneHotLayer>();
 611
 612   fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
 613
 614   _return_fn = std::move(fn);
 615 }
 616
 617 void KernelGenerator::visit(const ir::operation::Einsum &node)
 618 {
 619   const auto ofm_index{node.getOutputs().at(0)};
 620
 621   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 622   std::vector<const IPortableTensor *> input_tensors;
 623   for (auto &ifm_idx : node.getInputs())
 624     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 625
 626   const auto equation = node.param().equation;
 627
 628   auto fn = std::make_unique<ops::EinsumLayer>();
 629
 630   fn->configure(input_tensors, equation, output_tensor);
 631
 632   _return_fn = std::move(fn);
 633 }
 634
 635 void KernelGenerator::visit(const ir::operation::Custom &node)
 636 {
 637   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
 638                           std::vector<custom::TypeInfo> &types,
 639                           std::vector<IPortableTensor *> &tensors) {
 640     for (auto &idx : opSeq)
 641     {
 642       const auto &operand = _ctx.at(idx);
 643       // TODO make sure using `_current_op_seq_layout` is correct for custom operations
 644       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
 645       auto in_tensor = _tensor_reg->getPortableTensor(idx);
 646       tensors.emplace_back(in_tensor);
 647     }
 648   };
 649
 650   backend::custom::CustomKernelConfigParams params{};
 651
 652   fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
 653   fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
 654
 655   params.userdata = node.userdata().data;
 656   params.userdata_size = node.userdata().size;
 657
 658   auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
 659
 660   _return_fn = std::move(fn);
 661 }
 662
 663 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 664 {
 665   const auto output_index{node.getOutputs().at(0)};
 666   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 667
 668   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 669   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 670
 671   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
 672
 673   fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
 674                 convertElementwiseActivationType(node.param().op_type));
 675
 676   _return_fn = std::move(fn);
 677 }
 678
 679 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
 680 {
 681   const auto output_index{node.getOutputs().at(0)};
 682   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
 683   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 684
 685   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 686   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
 687   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
 688
 689   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
 690
 691   fn->configure(lhs_tensor, rhs_tensor, output_tensor,
 692                 convertElementwiseBinaryType(node.param().op_type));
 693
 694   _return_fn = std::move(fn);
 695 }
 696
 697 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 698 {
 699   const auto output_index{node.getOutputs().at(0)};
 700   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 701
 702   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 703   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 704
 705   auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
 706
 707   fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
 708
 709   _return_fn = std::move(fn);
 710 }
 711
 712 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 713 {
 714   const auto output_index{node.getOutputs().at(0)};
 715   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 716   const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
 717
 718   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 719   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 720   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
 721
 722   auto fn = std::make_unique<ops::ExpandDimsLayer>();
 723
 724   fn->configure(input_tensor, axis_tensor, output_tensor);
 725
 726   _return_fn = std::move(fn);
 727 }
 728
 729 void KernelGenerator::visit(const ir::operation::Pack &node)
 730 {
 731   const auto ofm_index{node.getOutputs().at(0)};
 732
 733   const auto rank = _ctx.at(ofm_index).shape().rank();
 734   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 735
 736   assert(-rank <= axis && axis < rank);
 737
 738   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 739
 740   std::vector<const IPortableTensor *> input_tensors;
 741   for (auto &ifm_idx : node.getInputs())
 742     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
 743
 744   auto fn = std::make_unique<ops::PackLayer>();
 745
 746   fn->configure(input_tensors, axis, output_tensor);
 747
 748   _return_fn = std::move(fn);
 749 }
 750
 751 void KernelGenerator::visit(const ir::operation::Unpack &node)
 752 {
 753   const auto input_index{node.getInputs().at(0)};
 754
 755   const auto rank = _ctx.at(input_index).shape().rank();
 756   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 757
 758   assert(rank == 0 || (-rank <= axis && axis < rank));
 759
 760   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 761
 762   std::vector<IPortableTensor *> output_tensors;
 763   for (auto &output_idx : node.getOutputs())
 764     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 765
 766   auto fn = std::make_unique<ops::UnpackLayer>();
 767
 768   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
 769
 770   fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
 771
 772   _return_fn = std::move(fn);
 773 }
 774
 775 void KernelGenerator::visit(const ir::operation::Pad &node)
 776 {
 777   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
 778   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
 779   const auto output_index{node.getOutputs().at(0)};
 780   assert(_ctx.at(pad_index).data());
 781
 782   auto input = _tensor_reg->getPortableTensor(input_index);
 783   auto output = _tensor_reg->getPortableTensor(output_index);
 784   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
 785   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
 786
 787   auto fn = std::make_unique<ops::PadLayer>();
 788
 789   bool isPadV2 = node.getInputs().size() == 3 ? true : false;
 790   const void *value = nullptr;
 791
 792   if (isPadV2)
 793   {
 794     const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
 795     value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
 796   }
 797
 798   fn->configure(input, output, pad_base, pad_rank, value);
 799   _return_fn = std::move(fn);
 800 }
 801
 802 void KernelGenerator::visit(const ir::operation::Transpose &node)
 803 {
 804   const auto output_index{node.getOutputs().at(0)};
 805   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
 806   const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
 807
 808   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 809   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 810   auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
 811
 812   auto fn = std::make_unique<ops::TransposeLayer>();
 813
 814   fn->configure(input_tensor, perm_tensor, output_tensor);
 815
 816   _return_fn = std::move(fn);
 817 }
 818
 819 void KernelGenerator::visit(const ir::operation::Reduce &node)
 820 {
 821   const auto output_index{node.getOutputs().at(0)};
 822   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
 823   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 824
 825   const auto keep_dims = node.param().keep_dims;
 826   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 827   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 828   auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
 829
 830   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
 831   {
 832     auto fn = std::make_unique<ops::MeanLayer>();
 833
 834     fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
 835
 836     _return_fn = std::move(fn);
 837   }
 838   else
 839   {
 840     auto fn = std::make_unique<ops::ReduceLayer>();
 841
 842     const auto reduce_type = convertReduceType(node.param().reduce_type);
 843     fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
 844
 845     _return_fn = std::move(fn);
 846   }
 847 }
 848
 849 void KernelGenerator::visit(const ir::operation::Select &node)
 850 {
 851   const auto output_index{node.getOutputs().at(0)};
 852   const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
 853   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
 854   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 855
 856   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 857   auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
 858   auto true_tensor = _tensor_reg->getPortableTensor(true_index);
 859   auto false_tensor = _tensor_reg->getPortableTensor(false_index);
 860
 861   auto fn = std::make_unique<ops::SelectLayer>();
 862
 863   fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
 864
 865   _return_fn = std::move(fn);
 866 }
 867
 868 void KernelGenerator::visit(const ir::operation::Slice &node)
 869 {
 870   const auto output_index{node.getOutputs().at(0)};
 871   const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
 872   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
 873   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 874
 875   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 876   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 877   auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
 878   auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
 879
 880   auto fn = std::make_unique<ops::SliceLayer>();
 881
 882   fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
 883
 884   _return_fn = std::move(fn);
 885 }
 886
 887 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 888 {
 889   const auto output_index{node.getOutputs().at(0)};
 890   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
 891   const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
 892   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
 893   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 894
 895   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 896   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 897   auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
 898   auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
 899   auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
 900
 901   auto begin_mask = node.param().begin_mask;
 902   auto end_mask = node.param().end_mask;
 903   auto shrink_axis_mask = node.param().shrink_axis_mask;
 904
 905   auto fn = std::make_unique<ops::StridedSliceLayer>();
 906
 907   fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
 908                 end_mask, shrink_axis_mask);
 909
 910   _return_fn = std::move(fn);
 911 }
 912
 913 void KernelGenerator::visit(const ir::operation::Split &node)
 914 {
 915   const auto num_splits = node.param().num_splits;
 916   assert(num_splits == static_cast<int>(node.getOutputs().size()));
 917
 918   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
 919   const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
 920
 921   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
 922   auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
 923
 924   std::vector<IPortableTensor *> out_tensors;
 925   for (auto &output_idx : node.getOutputs())
 926     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
 927
 928   auto fn = std::make_unique<ops::SplitLayer>();
 929
 930   fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
 931
 932   _return_fn = std::move(fn);
 933 }
 934
 935 void KernelGenerator::visit(const ir::operation::Shape &node)
 936 {
 937   const auto ofm_index{node.getOutputs().at(0)};
 938   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 939
 940   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
 941   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
 942
 943   auto fn = std::make_unique<ops::ShapeLayer>();
 944
 945   fn->configure(ifm_tensor, ofm_tensor);
 946
 947   _return_fn = std::move(fn);
 948 }
 949
 950 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 951 {
 952   const auto output_index{node.getOutputs().at(0)};
 953   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
 954
 955   auto align_corners = node.param().align_corners;
 956   auto half_pixel_centers = node.param().half_pixel_centers;
 957
 958   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 959   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 960
 961   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
 962
 963   if (node.getInputs().size() == 1)
 964   {
 965     fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
 966                   align_corners, half_pixel_centers);
 967   }
 968   else
 969   {
 970     assert(node.getInputs().size() == 2);
 971     const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
 972     auto size_tensor = _tensor_reg->getPortableTensor(size_index);
 973     if (size_tensor->is_constant())
 974     {
 975       auto size_vec = _ctx.at(size_index).asVector<int32_t>();
 976       const auto height_out = size_vec[0];
 977       const auto width_out = size_vec[1];
 978       fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
 979                     half_pixel_centers);
 980     }
 981     else
 982     {
 983       fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
 984     }
 985   }
 986
 987   _return_fn = std::move(fn);
 988 }
 989
 990 void KernelGenerator::visit(const ir::operation::Reverse &node)
 991 {
 992   const auto output_index{node.getOutputs().at(0)};
 993   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
 994   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
 995
 996   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
 997   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
 998   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
 999
1000   auto fn = std::make_unique<ops::ReverseLayer>();
1001
1002   fn->configure(input_tensor, axis_tensor, output_tensor);
1003
1004   _return_fn = std::move(fn);
1005 }
1006
1007 void KernelGenerator::visit(const ir::operation::ArgMax &node)
1008 {
1009   const auto output_index{node.getOutputs().at(0)};
1010   const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
1011   const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
1012
1013   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1014   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1015   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1016
1017   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1018
1019   fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
1020
1021   _return_fn = std::move(fn);
1022 }
1023
1024 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1025 {
1026   const auto ofm_index{node.getOutputs().at(0)};
1027   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1028
1029   const auto kh = node.param().kh;
1030   const auto kw = node.param().kw;
1031   const auto stride = node.param().stride;
1032   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
1033   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
1034   const auto padding =
1035       ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1036   const auto activation = node.param().activation;
1037
1038   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1039   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1040
1041   auto fn = std::make_unique<ops::PoolLayer>();
1042
1043   fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1044                 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1045                 convertPoolType(node.param().op_type));
1046
1047   _return_fn = std::move(fn);
1048 }
1049
1050 void KernelGenerator::visit(const ir::operation::Pow &node)
1051 {
1052   const auto output_index{node.getOutputs().at(0)};
1053   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1054   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1055
1056   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1057   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1058   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1059
1060   auto fn = std::make_unique<ops::PowLayer>();
1061
1062   fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1063
1064   _return_fn = std::move(fn);
1065 }
1066
1067 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1068 {
1069   const auto output_index{node.getOutputs().at(0)};
1070   const auto input_index{node.getInputs().at(0)};
1071
1072   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1073   auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1074
1075   auto fn = std::make_unique<ops::L2NormLayer>();
1076
1077   fn->configure(input_alloc, output_alloc);
1078
1079   _return_fn = std::move(fn);
1080 }
1081
1082 void KernelGenerator::visit(const ir::operation::Range &node)
1083 {
1084   const auto output_index{node.getOutputs().at(0)};
1085   const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1086   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1087   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1088
1089   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1090   auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1091   auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1092   auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1093
1094   auto fn = std::make_unique<ops::RangeLayer>();
1095
1096   fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1097   _return_fn = std::move(fn);
1098 }
1099
1100 void KernelGenerator::visit(const ir::operation::Rank &node)
1101 {
1102   const auto ofm_index{node.getOutputs().at(0)};
1103   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1104
1105   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1106   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1107
1108   auto fn = std::make_unique<ops::RankLayer>();
1109
1110   fn->configure(ifm_tensor, ofm_tensor);
1111
1112   _return_fn = std::move(fn);
1113 }
1114
1115 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1116 {
1117   const auto ofm_index{node.getOutputs().at(0)};
1118   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1119   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1120
1121   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1122   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1123   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1124
1125   auto fn = std::make_unique<ops::SqDiffLayer>();
1126
1127   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1128   _return_fn = std::move(fn);
1129 }
1130
1131 void KernelGenerator::visit(const ir::operation::Tile &node)
1132 {
1133   const auto output_index{node.getOutputs().at(0)};
1134   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1135   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1136
1137   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1138   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1139   auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1140
1141   auto fn = std::make_unique<ops::TileLayer>();
1142
1143   fn->configure(input_tensor, multiples_tensor, output_tensor);
1144   _return_fn = std::move(fn);
1145 }
1146
1147 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1148 {
1149   const auto output_index{node.getOutputs().at(0)};
1150   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1151   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1152   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1153
1154   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1155   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1156   auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1157   auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1158
1159   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1160
1161   fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1162   _return_fn = std::move(fn);
1163 }
1164
1165 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1166 {
1167   const auto output_index{node.getOutputs().at(0)};
1168   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1169   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1170
1171   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1172   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1173   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1174
1175   const auto adj_x = node.param().adj_x;
1176   const auto adj_y = node.param().adj_y;
1177
1178   auto fn = std::make_unique<ops::BatchMatMulLayer>();
1179
1180   fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1181   _return_fn = std::move(fn);
1182 }
1183
1184 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1185 {
1186   const auto output_index{node.getOutputs().at(0)};
1187   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1188   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1189
1190   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1191   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1192   auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1193
1194   auto fn = std::make_unique<ops::BroadcastToLayer>();
1195
1196   fn->configure(input_tensor, shape_tensor, output_tensor);
1197
1198   _return_fn = std::move(fn);
1199 }
1200
1201 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1202 {
1203   const auto ofm_index{node.getOutputs().at(0)};
1204
1205   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1206   std::vector<const IPortableTensor *> input_tensors;
1207   for (auto &ifm_idx : node.getInputs())
1208     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1209
1210   const auto epsilon = node.param().epsilon;
1211   const auto is_training = node.param().is_training;
1212   const auto data_format = node.param().data_format;
1213
1214   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1215
1216   fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1217
1218   _return_fn = std::move(fn);
1219 }
1220
1221 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1222 {
1223   const auto output_index{node.getOutputs().at(0)};
1224   const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1225
1226   const auto beta = node.param().beta;
1227   const auto axis = node.param().axis;
1228
1229   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1230   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1231
1232   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1233
1234   fn->configure(input_tensor, beta, axis, output_tensor);
1235
1236   _return_fn = std::move(fn);
1237 }
1238
1239 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1240 {
1241   const auto output_index{node.getOutputs().at(0)};
1242   const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1243   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1244   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1245
1246   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1247   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1248   auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1249   auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1250
1251   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1252
1253   fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1254
1255   _return_fn = std::move(fn);
1256 }
1257
1258 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1259 {
1260   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1261   const auto output_index{node.getOutputs().at(0)};
1262   auto block_size = node.param().block_size;
1263
1264   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1265   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1266
1267   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1268
1269   fn->configure(input_tensor, block_size, output_tensor);
1270   _return_fn = std::move(fn);
1271 }
1272
1273 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1274 {
1275   const auto output_index{node.getOutputs().at(0)};
1276   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1277   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1278
1279   auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1280   auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1281   auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1282
1283   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1284
1285   fn->configure(shape_alloc, seed_alloc, output_alloc);
1286   _return_fn = std::move(fn);
1287 }
1288
1289 void KernelGenerator::visit(const ir::operation::SplitV &node)
1290 {
1291   const auto num_splits = node.param().num_splits;
1292   assert(num_splits == static_cast<int>(node.getOutputs().size()));
1293
1294   const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1295   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1296   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1297
1298   auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1299   auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1300   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1301
1302   std::vector<IPortableTensor *> out_tensors;
1303   for (auto &output_idx : node.getOutputs())
1304     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1305
1306   auto fn = std::make_unique<ops::SplitVLayer>();
1307
1308   fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1309
1310   _return_fn = std::move(fn);
1311 }
1312
1313 void KernelGenerator::visit(const ir::operation::LSTM &node)
1314 {
1315   const auto scratch_buffer_index{
1316       node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1317   const auto output_state_out_index{
1318       node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1319   const auto cell_state_out_index{
1320       node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1321   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1322
1323   const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1324   const auto input_to_input_weights_index{
1325       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1326   const auto input_to_forget_weights_index{
1327       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1328   const auto input_to_cell_weights_index{
1329       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1330   const auto input_to_output_weights_index{
1331       node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1332   const auto recurrent_to_input_weights_index{
1333       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1334   const auto recurrent_to_forget_weights_index{
1335       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1336   const auto recurrent_to_cell_weights_index{
1337       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1338   const auto recurrent_to_output_weights_index{
1339       node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1340   const auto cell_to_input_weights_index{
1341       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1342   const auto cell_to_forget_weights_index{
1343       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1344   const auto cell_to_output_weights_index{
1345       node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1346   const auto input_gate_bias_index{
1347       node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1348   const auto forget_gate_bias_index{
1349       node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1350   const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1351   const auto output_gate_bias_index{
1352       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1353   const auto projection_weights_index{
1354       node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1355   const auto projection_bias_index{
1356       node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1357   const auto output_state_in_index{
1358       node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1359   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1360   const auto time_major = node.param().time_major;
1361
1362   // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1363   // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1364   // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1365   // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1366   bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1367                                     (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1368                                      _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1369   bool has_recurrent_to_input_weights =
1370       _ctx.exist(recurrent_to_input_weights_index) &&
1371       (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1372        _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1373
1374   // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1375   // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1376   // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1377   // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1378   bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1379                                     _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1380   bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1381                                     _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1382
1383   bool has_input_gate_bias =
1384       _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1385
1386   bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1387                                 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1388                                  _ctx.at(projection_weights_index).shape().dim(1) != 0);
1389   bool has_projection_bias =
1390       _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1391
1392   auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1393                                    ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1394                                    : nullptr; // optional
1395   auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1396                                      ? _tensor_reg->getPortableTensor(output_state_out_index)
1397                                      : nullptr; // optional
1398   auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1399                                    ? _tensor_reg->getPortableTensor(cell_state_out_index)
1400                                    : nullptr; // optional
1401   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1402
1403   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1404
1405   auto input_to_input_weights_tensor =
1406       has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1407                                  : nullptr; // optional
1408   auto input_to_forget_weights_tensor =
1409       _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1410   auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1411   auto input_to_output_weights_tensor =
1412       _tensor_reg->getPortableTensor(input_to_output_weights_index);
1413   auto recurrent_to_input_weights_tensor =
1414       has_recurrent_to_input_weights
1415           ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1416           : nullptr; // optional
1417   auto recurrent_to_forget_weights_tensor =
1418       _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1419   auto recurrent_to_cell_weights_tensor =
1420       _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1421   auto recurrent_to_output_weights_tensor =
1422       _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1423
1424   auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1425   auto cell_to_forget_weights_tensor =
1426       has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1427                                  : nullptr; // optional
1428   auto cell_to_output_weights_tensor =
1429       has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1430                                  : nullptr; // optional
1431
1432   auto input_gate_bias_tensor =
1433       has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1434   auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1435   auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1436   auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1437   auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1438   auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1439
1440   auto projection_weights_tensor = has_projection_weights
1441                                        ? _tensor_reg->getPortableTensor(projection_weights_index)
1442                                        : nullptr; // optional
1443   auto projection_bias_tensor = has_projection_bias
1444                                     ? _tensor_reg->getPortableTensor(projection_bias_index)
1445                                     : nullptr; // optional
1446
1447   IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1448   IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1449   IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1450   IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1451   if (node.getInputs().size() == 24)
1452   {
1453     const auto input_layer_norm_weights_index{
1454         node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1455     const auto forget_layer_norm_weights_index{
1456         node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1457     const auto cell_layer_norm_weights_index{
1458         node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1459     const auto output_layer_norm_weights_index{
1460         node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1461
1462     input_layer_norm_weights_tensor =
1463         _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1464     forget_layer_norm_weights_tensor =
1465         _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1466     cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1467     output_layer_norm_weights_tensor =
1468         _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1469   }
1470
1471   auto fn = std::make_unique<ops::LSTMLayer>();
1472
1473   fn->configure(
1474       input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1475       input_to_cell_weights_tensor, input_to_output_weights_tensor,
1476       recurrent_to_input_weights_tensor, recurrent_to_forget_weights_tensor,
1477       recurrent_to_cell_weights_tensor, recurrent_to_output_weights_tensor,
1478       cell_to_input_weights_tensor, cell_to_forget_weights_tensor, cell_to_output_weights_tensor,
1479       input_layer_norm_weights_tensor, forget_layer_norm_weights_tensor,
1480       cell_layer_norm_weights_tensor, output_layer_norm_weights_tensor,
1481       /*aux_input=*/nullptr,
1482       /*aux_input_to_input_weights=*/nullptr,
1483       /*aux_input_to_forget_weights=*/nullptr,
1484       /*aux_input_to_cell_weights=*/nullptr,
1485       /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1486       cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1487       projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1488       /*forward_sequence=*/true, time_major,
1489       /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1490       output_tensor,
1491       !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1492       !_ctx.at(cell_state_in_index).info().isVariable());
1493
1494   _return_fn = std::move(fn);
1495 }
1496
1497 } // namespace cpu
1498 } // namespace backend
1499 } // namespace onert