runtime/onert/backend/cpu/KernelGenerator.cc

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "KernelGenerator.h"
  18
  19 #include "ops/ArgMinMaxLayer.h"
  20 #include "ops/BatchToSpaceNDLayer.h"
  21 #include "ops/BinaryArithmeticLayer.h"
  22 #include "ops/CompareLayer.h"
  23 #include "ops/ConcatLayer.h"
  24 #include "ops/ConvolutionLayer.h"
  25 #include "ops/DepthwiseConvolutionLayer.h"
  26 #include "ops/EinsumLayer.h"
  27 #include "ops/ElementwiseActivationLayer.h"
  28 #include "ops/ElementwiseBinaryLayer.h"
  29 #include "ops/ElementwiseUnaryLayer.h"
  30 #include "ops/ExpandDimsLayer.h"
  31 #include "ops/FillLayer.h"
  32 #include "ops/FullyConnectedLayer.h"
  33 #include "ops/GatherLayer.h"
  34 #include "ops/MeanLayer.h"
  35 #include "ops/OneHotLayer.h"
  36 #include "ops/OperationUtils.h"
  37 #include "ops/PackLayer.h"
  38 #include "ops/PadLayer.h"
  39 #include "ops/PoolLayer.h"
  40 #include "ops/PowLayer.h"
  41 #include "ops/RangeLayer.h"
  42 #include "ops/RankLayer.h"
  43 #include "ops/ReduceLayer.h"
  44 #include "ops/ReshapeLayer.h"
  45 #include "ops/ResizeBilinearLayer.h"
  46 #include "ops/ReverseLayer.h"
  47 #include "ops/SelectLayer.h"
  48 #include "ops/ShapeLayer.h"
  49 #include "ops/SliceLayer.h"
  50 #include "ops/SoftMaxLayer.h"
  51 #include "ops/StridedSliceLayer.h"
  52 #include "ops/SpaceToBatchNDLayer.h"
  53 #include "ops/SpaceToDepthLayer.h"
  54 #include "ops/SplitLayer.h"
  55 #include "ops/SplitVLayer.h"
  56 #include "ops/TileLayer.h"
  57 #include "ops/TransposeLayer.h"
  58 #include "ops/UnpackLayer.h"
  59 #include "ops/SquaredDiffLayer.h"
  60 #include "ops/L2NormLayer.h"
  61 #include "ops/MatrixBandPartLayer.h"
  62 #include "ops/BatchMatMulLayer.h"
  63 #include "ops/BroadcastToLayer.h"
  64 #include "ops/FusedBatchNormLayer.h"
  65 #include "ops/LogSoftMaxLayer.h"
  66 #include "ops/StatelessRandomUniformLayer.h"
  67
  68 #include <backend/Backend.h>
  69 #include <backend/IConfig.h>
  70 #include <memory>
  71 #include <util/Utils.h>
  72 #include <util/logging.h>
  73 #include <exec/DynamicShapeInference.h>
  74
  75 #include <stdexcept>
  76
  77 namespace onert
  78 {
  79 namespace backend
  80 {
  81 namespace cpu
  82 {
  83
  84 namespace
  85 {
  86 ops::ArithmeticType
  87 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
  88 {
  89   switch (arithmetic_type_ir)
  90   {
  91     case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
  92       return ops::ArithmeticType::kAdd;
  93     case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
  94       return ops::ArithmeticType::kSub;
  95     case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
  96       return ops::ArithmeticType::kMul;
  97     case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
  98       return ops::ArithmeticType::kDiv;
  99     default:
 100       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 101   }
 102 }
 103
 104 ops::ElementwiseActivationType
 105 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
 106 {
 107   switch (type_ir)
 108   {
 109     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
 110       return ops::ElementwiseActivationType::kLogistic;
 111     case ir::operation::ElementwiseActivation::Type::RELU:
 112       return ops::ElementwiseActivationType::kReLU;
 113     case ir::operation::ElementwiseActivation::Type::TANH:
 114       return ops::ElementwiseActivationType::kTanh;
 115     default:
 116       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 117   }
 118 }
 119
 120 ops::ElementwiseBinaryType
 121 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
 122 {
 123   switch (type_ir)
 124   {
 125     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
 126       return ops::ElementwiseBinaryType::kLogicalOr;
 127     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
 128       return ops::ElementwiseBinaryType::kMax;
 129     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
 130       return ops::ElementwiseBinaryType::kMin;
 131     default:
 132       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 133   }
 134 }
 135
 136 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
 137 {
 138   switch (type_ir)
 139   {
 140     case ir::operation::ElementwiseUnary::Type::ABS:
 141       return ops::ElementwiseUnaryType::kAbs;
 142     case ir::operation::ElementwiseUnary::Type::CAST:
 143       return ops::ElementwiseUnaryType::kCast;
 144     case ir::operation::ElementwiseUnary::Type::COS:
 145       return ops::ElementwiseUnaryType::kCos;
 146     case ir::operation::ElementwiseUnary::Type::ERF:
 147       return ops::ElementwiseUnaryType::kErf;
 148     case ir::operation::ElementwiseUnary::Type::EXP:
 149       return ops::ElementwiseUnaryType::kExp;
 150     case ir::operation::ElementwiseUnary::Type::LOG:
 151       return ops::ElementwiseUnaryType::kLog;
 152     case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
 153       return ops::ElementwiseUnaryType::kLogicalNot;
 154     case ir::operation::ElementwiseUnary::Type::NEG:
 155       return ops::ElementwiseUnaryType::kNeg;
 156     case ir::operation::ElementwiseUnary::Type::QUANTIZE:
 157       return ops::ElementwiseUnaryType::kQuantize;
 158     case ir::operation::ElementwiseUnary::Type::ROUND:
 159       return ops::ElementwiseUnaryType::kRound;
 160     case ir::operation::ElementwiseUnary::Type::RSQRT:
 161       return ops::ElementwiseUnaryType::kRSqrt;
 162     case ir::operation::ElementwiseUnary::Type::SIN:
 163       return ops::ElementwiseUnaryType::kSin;
 164     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
 165       return ops::ElementwiseUnaryType::kZerosLike;
 166     default:
 167       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 168   }
 169 }
 170
 171 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
 172 {
 173   switch (type_ir)
 174   {
 175     case ir::operation::Pool2D::PoolType::AVG:
 176       return ops::PoolType::kAvg;
 177     case ir::operation::Pool2D::PoolType::MAX:
 178       return ops::PoolType::kMax;
 179     default:
 180       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 181   }
 182 }
 183
 184 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 185 {
 186   switch (reduce_type_ir)
 187   {
 188     case ir::operation::Reduce::ReduceType::ALL:
 189       return ops::ReduceType::kAll;
 190     case ir::operation::Reduce::ReduceType::ANY:
 191       return ops::ReduceType::kAny;
 192     case ir::operation::Reduce::ReduceType::MAX:
 193       return ops::ReduceType::kMax;
 194     case ir::operation::Reduce::ReduceType::MIN:
 195       return ops::ReduceType::kMin;
 196     case ir::operation::Reduce::ReduceType::PROD:
 197       return ops::ReduceType::kProd;
 198     case ir::operation::Reduce::ReduceType::SUM:
 199       return ops::ReduceType::kSum;
 200     default:
 201       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
 202   }
 203 }
 204 } // namespace
 205
 206 KernelGenerator::KernelGenerator(
 207     const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
 208     const std::shared_ptr<TensorBuilder> &tensor_builder,
 209     const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
 210     const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
 211     const std::shared_ptr<ExternalContext> &external_context)
 212     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
 213       _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
 214       _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
 215 {
 216   // DO NOTHING
 217 }
 218
 219 void KernelGenerator::visit(const ir::OpSequence &op_seq)
 220 {
 221   assert(!_return_fn_seq);
 222   assert(_tensor_builder->dynamicTensorManager());
 223   assert(_tensor_reg);
 224
 225   auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
 226
 227   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
 228
 229   // Prepare to handle dynamic tensors later
 230   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
 231   {
 232     dyn_ctx->op_seq = &op_seq;
 233     dyn_ctx->operations = &_operations_ctx;
 234     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
 235     dyn_ctx->tensor_registry = _tensor_reg;
 236     dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
 237
 238     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
 239   }
 240   _return_fn_seq->enableDynamicShapeInferer(true);
 241
 242   _current_op_seq_layout = op_seq.getLayout();
 243   for (const auto &operation_idx : op_seq.operations())
 244   {
 245     const auto &node = _operations_ctx.at(operation_idx);
 246     node.accept(*this);
 247     _return_fn_seq->append(releaseFunction());
 248
 249     for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
 250     {
 251       auto portable_tensor = _tensor_reg->getPortableTensor(ind);
 252       if (portable_tensor)
 253       {
 254         assert(portable_tensor->layout() == ir::Layout::NHWC);
 255       }
 256
 257       auto tensor = _tensor_reg->getNativeTensor(ind);
 258       if (tensor)
 259       {
 260         tensor->increase_ref();
 261       }
 262     }
 263   }
 264 }
 265
 266 void KernelGenerator::visit(const ir::operation::Conv2D &node)
 267 {
 268   using ir::operation::Conv2D;
 269
 270   const auto ofm_index{node.getOutputs().at(0)};
 271   const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
 272   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
 273   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 274
 275   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 276   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 277   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
 278   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
 279
 280   const auto stride = node.param().stride;
 281   const auto activation = node.param().activation;
 282   const auto param_padding = node.param().padding;
 283   const auto dilation = node.param().dilation;
 284   auto fn = std::make_unique<ops::ConvolutionLayer>();
 285
 286   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
 287   {
 288     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
 289                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
 290                   stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
 291                   activation, ofm_tensor);
 292
 293     _return_fn = std::move(fn);
 294     return;
 295   }
 296   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
 297   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
 298   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
 299   const auto &ker_shape = _ctx.at(ker_index).shape();
 300   const auto ker_height = ker_shape.dim(1);
 301   const auto ker_width = ker_shape.dim(2);
 302
 303   const auto padding =
 304       ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
 305                            dilation.width_factor, dilation.height_factor);
 306
 307   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
 308                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
 309                 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
 310
 311   _return_fn = std::move(fn);
 312 }
 313
 314 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 315 {
 316   using ir::operation::DepthwiseConv2D;
 317
 318   const auto ofm_index{node.getOutputs().at(0)};
 319   const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
 320   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
 321   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 322
 323   const auto stride = node.param().stride;
 324   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
 325   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
 326   // Kernel format is [1, kernel_height, kernel_width, depth_out].
 327   const auto &ker_shape = _ctx.at(ker_index).shape();
 328   const auto ker_height = ker_shape.dim(1);
 329   const auto ker_width = ker_shape.dim(2);
 330   const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
 331                                             ker_width, ker_height);
 332   const auto multiplier = node.param().multiplier;
 333   const auto activation = node.param().activation;
 334
 335   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 336   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 337   auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
 338   auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
 339
 340   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
 341
 342   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
 343                 padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
 344                 ofm_tensor);
 345
 346   _return_fn = std::move(fn);
 347 }
 348
 349 void KernelGenerator::visit(const ir::operation::Concat &node)
 350 {
 351   const auto ofm_index{node.getOutputs().at(0)};
 352
 353   const auto rank = _ctx.at(ofm_index).shape().rank();
 354   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 355
 356   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 357
 358   std::vector<const IPortableTensor *> input_tensors;
 359   for (auto &ifm_idx : node.getInputs())
 360     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 361
 362   auto fn = std::make_unique<ops::ConcatLayer>();
 363
 364   fn->configure(input_tensors, axis, output_tensor);
 365
 366   _return_fn = std::move(fn);
 367 }
 368
 369 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
 370 {
 371   const auto output_index{node.getOutputs().at(0)};
 372   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
 373   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
 374
 375   auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
 376   auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
 377   auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
 378
 379   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
 380
 381   IPortableTensor *crops_alloc = nullptr;
 382   const auto NNApiInputs = 2;
 383
 384   if (node.getInputs().size() != NNApiInputs)
 385   {
 386     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
 387     crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
 388   }
 389
 390   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
 391
 392   _return_fn = std::move(fn);
 393 }
 394
 395 void KernelGenerator::visit(const ir::operation::Fill &node)
 396 {
 397   const auto output_index{node.getOutputs().at(0)};
 398   const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
 399   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 400
 401   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 402   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 403   auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
 404
 405   auto fn = std::make_unique<ops::FillLayer>();
 406
 407   fn->configure(input_tensor, value_tensor, output_tensor);
 408
 409   _return_fn = std::move(fn);
 410 }
 411
 412 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 413 {
 414   using ir::operation::FullyConnected;
 415
 416   const auto output_index{node.getOutputs().at(0)};
 417   const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
 418   const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
 419   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
 420   const auto activation = node.param().activation;
 421
 422   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 423   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 424   auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
 425   auto bias_tensor =
 426       bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
 427
 428   auto fn = std::make_unique<ops::FullyConnectedLayer>();
 429
 430   fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor,
 431                 _external_context);
 432
 433   _return_fn = std::move(fn);
 434 }
 435
 436 void KernelGenerator::visit(const ir::operation::Reshape &node)
 437 {
 438   const auto output_index{node.getOutputs().at(0)};
 439   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 440
 441   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 442   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 443
 444   // optional 2nd input
 445   IPortableTensor *shape_tensor = nullptr;
 446
 447   if (node.getInputs().size() == 2)
 448   {
 449     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
 450     shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
 451   }
 452
 453   auto fn = std::make_unique<ops::ReshapeLayer>();
 454
 455   fn->configure(input_tensor, shape_tensor, output_tensor);
 456   _return_fn = std::move(fn);
 457 }
 458
 459 void KernelGenerator::visit(const ir::operation::Squeeze &node)
 460 {
 461   const auto output_index{node.getOutputs().at(0)};
 462   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
 463
 464   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 465   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 466
 467   // Squeeze can share same kernel with reshape
 468   auto fn = std::make_unique<ops::ReshapeLayer>();
 469
 470   fn->configure(input_tensor, nullptr, output_tensor);
 471
 472   _return_fn = std::move(fn);
 473 }
 474
 475 void KernelGenerator::visit(const ir::operation::Softmax &node)
 476 {
 477   const auto output_index{node.getOutputs().at(0)};
 478   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
 479
 480   const auto beta = node.param().beta;
 481
 482   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 483   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 484
 485   auto fn = std::make_unique<ops::SoftMaxLayer>();
 486
 487   fn->configure(input_tensor, beta, output_tensor);
 488
 489   _return_fn = std::move(fn);
 490 }
 491
 492 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 493 {
 494   const auto ofm_index{node.getOutputs().at(0)};
 495   const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
 496   const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 497
 498   const auto activation = node.param().activation;
 499
 500   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 501   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
 502   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 503
 504   auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
 505
 506   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
 507                 convertArithmeticType(node.param().arithmetic_type));
 508
 509   _return_fn = std::move(fn);
 510 }
 511
 512 void KernelGenerator::visit(const ir::operation::Comparison &node)
 513 {
 514   const auto ofm_index{node.getOutputs().at(0)};
 515   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
 516   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
 517
 518   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 519   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
 520   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 521
 522   auto comparison_type = node.param().comparison_type;
 523
 524   auto fn = std::make_unique<ops::CompareLayer>();
 525
 526   fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
 527
 528   _return_fn = std::move(fn);
 529 }
 530
 531 void KernelGenerator::visit(const ir::operation::Gather &node)
 532 {
 533   const auto output_index{node.getOutputs().at(0)};
 534   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
 535   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 536
 537   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 538   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 539   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
 540
 541   const auto backend_layout = output_tensor->layout();
 542   UNUSED_RELEASE(backend_layout);
 543
 544   // NOTE The frontend layout and backend layout must be the same for this operation.
 545   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
 546   //      is not not efficient even if it works well. If so, it would be better to set the
 547   //      layout of these backend tensors to the same layout.
 548   //      There is also one thing we have to think about. This operation depends on the layout of
 549   //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
 550   //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
 551   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
 552   assert(backend_layout == input_tensor->layout());
 553   assert(backend_layout == indices_tensor->layout());
 554   const auto &input_shape = _ctx.at(input_index).shape();
 555   UNUSED_RELEASE(input_shape);
 556   assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
 557
 558   const auto axis_raw = node.param().axis;
 559   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
 560
 561   auto fn = std::make_unique<ops::GatherLayer>();
 562
 563   fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
 564
 565   _return_fn = std::move(fn);
 566 }
 567
 568 void KernelGenerator::visit(const ir::operation::OneHot &node)
 569 {
 570   const auto output_index{node.getOutputs().at(0)};
 571   const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
 572   const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
 573   const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
 574   const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
 575
 576   const auto axis = node.param().axis;
 577
 578   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 579   auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
 580   auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
 581   auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
 582   auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
 583
 584   assert(indices_tensor->data_type() == OperandType::INT32);
 585   assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
 586
 587   auto fn = std::make_unique<ops::OneHotLayer>();
 588
 589   fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
 590
 591   _return_fn = std::move(fn);
 592 }
 593
 594 void KernelGenerator::visit(const ir::operation::Einsum &node)
 595 {
 596   const auto ofm_index{node.getOutputs().at(0)};
 597
 598   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 599   std::vector<const IPortableTensor *> input_tensors;
 600   for (auto &ifm_idx : node.getInputs())
 601     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 602
 603   const auto equation = node.param().equation;
 604
 605   auto fn = std::make_unique<ops::EinsumLayer>();
 606
 607   fn->configure(input_tensors, equation, output_tensor);
 608
 609   _return_fn = std::move(fn);
 610 }
 611
 612 void KernelGenerator::visit(const ir::operation::Custom &node)
 613 {
 614   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
 615                           std::vector<custom::TypeInfo> &types,
 616                           std::vector<std::shared_ptr<IPortableTensor>> &tensors) {
 617     for (auto &idx : opSeq)
 618     {
 619       const auto &operand = _ctx.at(idx);
 620       // TODO make sure using `_current_op_seq_layout` is correct for custom operations
 621       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
 622       auto in_tensor = _tensor_reg->getPortableTensor(idx);
 623       tensors.emplace_back(in_tensor);
 624     }
 625   };
 626
 627   backend::custom::CustomKernelConfigParams params{};
 628
 629   fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
 630   fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
 631
 632   params.userdata = node.userdata().data;
 633   params.userdata_size = node.userdata().size;
 634
 635   auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
 636
 637   _return_fn = std::move(fn);
 638 }
 639
 640 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 641 {
 642   const auto output_index{node.getOutputs().at(0)};
 643   const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 644
 645   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 646   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 647
 648   auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
 649
 650   fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
 651                 convertElementwiseActivationType(node.param().op_type));
 652
 653   _return_fn = std::move(fn);
 654 }
 655
 656 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
 657 {
 658   const auto output_index{node.getOutputs().at(0)};
 659   const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
 660   const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 661
 662   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 663   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
 664   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 665
 666   auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
 667
 668   fn->configure(lhs_tensor, rhs_tensor, output_tensor,
 669                 convertElementwiseBinaryType(node.param().op_type));
 670
 671   _return_fn = std::move(fn);
 672 }
 673
 674 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 675 {
 676   const auto output_index{node.getOutputs().at(0)};
 677   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 678
 679   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 680   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 681
 682   auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
 683
 684   fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
 685
 686   _return_fn = std::move(fn);
 687 }
 688
 689 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 690 {
 691   const auto output_index{node.getOutputs().at(0)};
 692   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 693   const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
 694
 695   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 696   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 697   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
 698
 699   auto fn = std::make_unique<ops::ExpandDimsLayer>();
 700
 701   fn->configure(input_tensor, axis_tensor, output_tensor);
 702
 703   _return_fn = std::move(fn);
 704 }
 705
 706 void KernelGenerator::visit(const ir::operation::Pack &node)
 707 {
 708   const auto ofm_index{node.getOutputs().at(0)};
 709
 710   const auto rank = _ctx.at(ofm_index).shape().rank();
 711   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 712
 713   assert(-rank <= axis && axis < rank);
 714
 715   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 716
 717   std::vector<const IPortableTensor *> input_tensors;
 718   for (auto &ifm_idx : node.getInputs())
 719     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 720
 721   auto fn = std::make_unique<ops::PackLayer>();
 722
 723   fn->configure(input_tensors, axis, output_tensor);
 724
 725   _return_fn = std::move(fn);
 726 }
 727
 728 void KernelGenerator::visit(const ir::operation::Unpack &node)
 729 {
 730   const auto input_index{node.getInputs().at(0)};
 731
 732   const auto rank = _ctx.at(input_index).shape().rank();
 733   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 734
 735   assert(rank == 0 || (-rank <= axis && axis < rank));
 736
 737   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 738
 739   std::vector<IPortableTensor *> output_tensors;
 740   for (auto &output_idx : node.getOutputs())
 741     output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
 742
 743   auto fn = std::make_unique<ops::UnpackLayer>();
 744
 745   uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
 746
 747   fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
 748
 749   _return_fn = std::move(fn);
 750 }
 751
 752 void KernelGenerator::visit(const ir::operation::Pad &node)
 753 {
 754   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
 755   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
 756   const auto output_index{node.getOutputs().at(0)};
 757   assert(_ctx.at(pad_index).data());
 758
 759   auto input = _tensor_reg->getPortableTensor(input_index).get();
 760   auto output = _tensor_reg->getPortableTensor(output_index).get();
 761   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
 762   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
 763
 764   auto fn = std::make_unique<ops::PadLayer>();
 765
 766   bool isPadV2 = node.getInputs().size() == 3 ? true : false;
 767   const void *value = nullptr;
 768
 769   if (isPadV2)
 770   {
 771     const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
 772     value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
 773   }
 774
 775   fn->configure(input, output, pad_base, pad_rank, value);
 776   _return_fn = std::move(fn);
 777 }
 778
 779 void KernelGenerator::visit(const ir::operation::Transpose &node)
 780 {
 781   const auto output_index{node.getOutputs().at(0)};
 782   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
 783
 784   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 785   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 786
 787   auto fn = std::make_unique<ops::TransposeLayer>();
 788
 789   fn->configure(input_tensor, output_tensor, node.param().perm);
 790
 791   _return_fn = std::move(fn);
 792 }
 793
 794 void KernelGenerator::visit(const ir::operation::Reduce &node)
 795 {
 796   const auto output_index{node.getOutputs().at(0)};
 797   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
 798   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 799
 800   const auto keep_dims = node.param().keep_dims;
 801   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 802   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 803   auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
 804
 805   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
 806   {
 807     auto fn = std::make_unique<ops::MeanLayer>();
 808
 809     fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
 810
 811     _return_fn = std::move(fn);
 812   }
 813   else
 814   {
 815     auto fn = std::make_unique<ops::ReduceLayer>();
 816
 817     const auto reduce_type = convertReduceType(node.param().reduce_type);
 818     fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
 819
 820     _return_fn = std::move(fn);
 821   }
 822 }
 823
 824 void KernelGenerator::visit(const ir::operation::Select &node)
 825 {
 826   const auto output_index{node.getOutputs().at(0)};
 827   const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
 828   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
 829   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 830
 831   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 832   auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
 833   auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
 834   auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
 835
 836   auto fn = std::make_unique<ops::SelectLayer>();
 837
 838   fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
 839
 840   _return_fn = std::move(fn);
 841 }
 842
 843 void KernelGenerator::visit(const ir::operation::Slice &node)
 844 {
 845   const auto output_index{node.getOutputs().at(0)};
 846   const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
 847   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
 848   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 849
 850   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 851   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 852   auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
 853   auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
 854
 855   auto fn = std::make_unique<ops::SliceLayer>();
 856
 857   fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
 858
 859   _return_fn = std::move(fn);
 860 }
 861
 862 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 863 {
 864   const auto output_index{node.getOutputs().at(0)};
 865   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
 866   const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
 867   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
 868   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 869
 870   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 871   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 872   auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
 873   auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
 874   auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
 875
 876   auto begin_mask = node.param().begin_mask;
 877   auto end_mask = node.param().end_mask;
 878   auto shrink_axis_mask = node.param().shrink_axis_mask;
 879
 880   auto fn = std::make_unique<ops::StridedSliceLayer>();
 881
 882   fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
 883                 end_mask, shrink_axis_mask);
 884
 885   _return_fn = std::move(fn);
 886 }
 887
 888 void KernelGenerator::visit(const ir::operation::Split &node)
 889 {
 890   const auto num_splits = node.param().num_splits;
 891   assert(num_splits == static_cast<int>(node.getOutputs().size()));
 892
 893   const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
 894   const auto rank = _ctx.at(input_idx).shape().rank();
 895   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 896   auto axis_resolved = axis < 0 ? axis + rank : axis;
 897
 898   auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
 899
 900   std::vector<IPortableTensor *> out_tensors;
 901   for (auto &output_idx : node.getOutputs())
 902     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
 903
 904   auto fn = std::make_unique<ops::SplitLayer>();
 905
 906   fn->configure(in_tensor, num_splits, axis_resolved, out_tensors);
 907
 908   _return_fn = std::move(fn);
 909 }
 910
 911 void KernelGenerator::visit(const ir::operation::Shape &node)
 912 {
 913   const auto ofm_index{node.getOutputs().at(0)};
 914   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 915
 916   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 917   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 918
 919   auto fn = std::make_unique<ops::ShapeLayer>();
 920
 921   fn->configure(ifm_tensor, ofm_tensor);
 922
 923   _return_fn = std::move(fn);
 924 }
 925
 926 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 927 {
 928   const auto output_index{node.getOutputs().at(0)};
 929   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
 930
 931   auto output_height = node.param().height_out;
 932   auto output_width = node.param().width_out;
 933   auto align_corners = node.param().align_corners;
 934   auto half_pixel_centers = node.param().half_pixel_centers;
 935
 936   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 937   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 938
 939   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
 940
 941   fn->configure(input_tensor, output_tensor, output_height, output_width, align_corners,
 942                 half_pixel_centers);
 943
 944   _return_fn = std::move(fn);
 945 }
 946
 947 void KernelGenerator::visit(const ir::operation::Reverse &node)
 948 {
 949   const auto output_index{node.getOutputs().at(0)};
 950   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
 951   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
 952
 953   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 954   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 955   auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
 956
 957   auto fn = std::make_unique<ops::ReverseLayer>();
 958
 959   fn->configure(input_tensor, axis_tensor, output_tensor);
 960
 961   _return_fn = std::move(fn);
 962 }
 963
 964 void KernelGenerator::visit(const ir::operation::ArgMax &node)
 965 {
 966   const auto output_index{node.getOutputs().at(0)};
 967   const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
 968
 969   const auto axis = node.param().axis;
 970
 971   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 972   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 973
 974   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
 975
 976   fn->configure(input_tensor, output_tensor, axis, /* is_arg_max */ true);
 977
 978   _return_fn = std::move(fn);
 979 }
 980
 981 void KernelGenerator::visit(const ir::operation::Pool2D &node)
 982 {
 983   const auto ofm_index{node.getOutputs().at(0)};
 984   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
 985
 986   const auto kh = node.param().kh;
 987   const auto kw = node.param().kw;
 988   const auto stride = node.param().stride;
 989   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
 990   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
 991   const auto padding =
 992       ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
 993   const auto activation = node.param().activation;
 994
 995   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 996   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 997
 998   auto fn = std::make_unique<ops::PoolLayer>();
 999
1000   fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1001                 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1002                 convertPoolType(node.param().op_type));
1003
1004   _return_fn = std::move(fn);
1005 }
1006
1007 void KernelGenerator::visit(const ir::operation::Pow &node)
1008 {
1009   const auto output_index{node.getOutputs().at(0)};
1010   const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1011   const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1012
1013   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1014   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
1015   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
1016
1017   auto fn = std::make_unique<ops::PowLayer>();
1018
1019   fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1020
1021   _return_fn = std::move(fn);
1022 }
1023
1024 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1025 {
1026   const auto output_index{node.getOutputs().at(0)};
1027   const auto input_index{node.getInputs().at(0)};
1028
1029   auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
1030   auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
1031
1032   auto fn = std::make_unique<ops::L2NormLayer>();
1033
1034   fn->configure(input_alloc, output_alloc);
1035
1036   _return_fn = std::move(fn);
1037 }
1038
1039 void KernelGenerator::visit(const ir::operation::Range &node)
1040 {
1041   const auto output_index{node.getOutputs().at(0)};
1042   const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1043   const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1044   const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1045
1046   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1047   auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
1048   auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
1049   auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
1050
1051   auto fn = std::make_unique<ops::RangeLayer>();
1052
1053   fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1054   _return_fn = std::move(fn);
1055 }
1056
1057 void KernelGenerator::visit(const ir::operation::Rank &node)
1058 {
1059   const auto ofm_index{node.getOutputs().at(0)};
1060   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1061
1062   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
1063   auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
1064
1065   auto fn = std::make_unique<ops::RankLayer>();
1066
1067   fn->configure(ifm_tensor, ofm_tensor);
1068
1069   _return_fn = std::move(fn);
1070 }
1071
1072 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1073 {
1074   const auto ofm_index{node.getOutputs().at(0)};
1075   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1076   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1077
1078   auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
1079   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
1080   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
1081
1082   auto fn = std::make_unique<ops::SqDiffLayer>();
1083
1084   fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1085   _return_fn = std::move(fn);
1086 }
1087
1088 void KernelGenerator::visit(const ir::operation::Tile &node)
1089 {
1090   const auto output_index{node.getOutputs().at(0)};
1091   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1092   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1093
1094   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1095   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1096   auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
1097
1098   auto fn = std::make_unique<ops::TileLayer>();
1099
1100   fn->configure(input_tensor, multiples_tensor, output_tensor);
1101   _return_fn = std::move(fn);
1102 }
1103
1104 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1105 {
1106   const auto output_index{node.getOutputs().at(0)};
1107   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1108   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1109   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1110
1111   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1112   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1113   auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
1114   auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
1115
1116   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1117
1118   fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1119   _return_fn = std::move(fn);
1120 }
1121
1122 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1123 {
1124   const auto output_index{node.getOutputs().at(0)};
1125   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1126   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1127
1128   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1129   auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
1130   auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
1131
1132   const auto adj_x = node.param().adj_x;
1133   const auto adj_y = node.param().adj_y;
1134
1135   auto fn = std::make_unique<ops::BatchMatMulLayer>();
1136
1137   fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1138   _return_fn = std::move(fn);
1139 }
1140
1141 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1142 {
1143   const auto output_index{node.getOutputs().at(0)};
1144   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1145   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1146
1147   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1148   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1149   auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
1150
1151   auto fn = std::make_unique<ops::BroadcastToLayer>();
1152
1153   fn->configure(input_tensor, shape_tensor, output_tensor);
1154
1155   _return_fn = std::move(fn);
1156 }
1157
1158 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1159 {
1160   const auto ofm_index{node.getOutputs().at(0)};
1161
1162   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
1163   std::vector<const IPortableTensor *> input_tensors;
1164   for (auto &ifm_idx : node.getInputs())
1165     input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
1166
1167   const auto epsilon = node.param().epsilon;
1168   const auto is_training = node.param().is_training;
1169   const auto data_format = node.param().data_format;
1170
1171   auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1172
1173   fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1174
1175   _return_fn = std::move(fn);
1176 }
1177
1178 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1179 {
1180   const auto output_index{node.getOutputs().at(0)};
1181   const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1182
1183   const auto beta = node.param().beta;
1184   const auto axis = node.param().axis;
1185
1186   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1187   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1188
1189   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1190
1191   fn->configure(input_tensor, beta, axis, output_tensor);
1192
1193   _return_fn = std::move(fn);
1194 }
1195
1196 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1197 {
1198   const auto output_index{node.getOutputs().at(0)};
1199   const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1200   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1201   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1202
1203   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1204   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1205   auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
1206   auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
1207
1208   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1209
1210   fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1211
1212   _return_fn = std::move(fn);
1213 }
1214
1215 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1216 {
1217   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1218   const auto output_index{node.getOutputs().at(0)};
1219   auto block_size = node.param().block_size;
1220
1221   auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1222   auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1223
1224   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1225
1226   fn->configure(input_tensor, block_size, output_tensor);
1227   _return_fn = std::move(fn);
1228 }
1229
1230 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1231 {
1232   const auto output_index{node.getOutputs().at(0)};
1233   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1234   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1235
1236   auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
1237   auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
1238   auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
1239
1240   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1241
1242   fn->configure(shape_alloc, seed_alloc, output_alloc);
1243   _return_fn = std::move(fn);
1244 }
1245
1246 void KernelGenerator::visit(const ir::operation::SplitV &node)
1247 {
1248   const auto num_splits = node.param().num_splits;
1249   assert(num_splits == static_cast<int>(node.getOutputs().size()));
1250
1251   const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1252   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1253   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1254
1255   auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
1256   auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
1257   auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
1258
1259   std::vector<IPortableTensor *> out_tensors;
1260   for (auto &output_idx : node.getOutputs())
1261     out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
1262
1263   auto fn = std::make_unique<ops::SplitVLayer>();
1264
1265   fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1266
1267   _return_fn = std::move(fn);
1268 }
1269
1270 } // namespace cpu
1271 } // namespace backend
1272 } // namespace onert