2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "KernelGenerator.h"
19 #include "ops/AddNLayer.h"
20 #include "ops/ArgMinMaxLayer.h"
21 #include "ops/BatchToSpaceNDLayer.h"
22 #include "ops/BinaryArithmeticLayer.h"
23 #include "ops/CompareLayer.h"
24 #include "ops/ConcatLayer.h"
25 #include "ops/ConvolutionLayer.h"
26 #include "ops/DepthToSpaceLayer.h"
27 #include "ops/DepthwiseConvolutionLayer.h"
28 #include "ops/EinsumLayer.h"
29 #include "ops/ElementwiseActivationLayer.h"
30 #include "ops/ElementwiseBinaryLayer.h"
31 #include "ops/ElementwiseUnaryLayer.h"
32 #include "ops/ExpandDimsLayer.h"
33 #include "ops/FillLayer.h"
34 #include "ops/FullyConnectedLayer.h"
35 #include "ops/GatherLayer.h"
36 #include "ops/LSTMLayer.h"
37 #include "ops/MeanLayer.h"
38 #include "ops/OneHotLayer.h"
39 #include "ops/OperationUtils.h"
40 #include "ops/PackLayer.h"
41 #include "ops/PadLayer.h"
42 #include "ops/PoolLayer.h"
43 #include "ops/PowLayer.h"
44 #include "ops/RangeLayer.h"
45 #include "ops/RankLayer.h"
46 #include "ops/ReduceLayer.h"
47 #include "ops/ReshapeLayer.h"
48 #include "ops/ResizeBilinearLayer.h"
49 #include "ops/ReverseLayer.h"
50 #include "ops/SelectLayer.h"
51 #include "ops/ShapeLayer.h"
52 #include "ops/SliceLayer.h"
53 #include "ops/SoftMaxLayer.h"
54 #include "ops/StridedSliceLayer.h"
55 #include "ops/SpaceToBatchNDLayer.h"
56 #include "ops/SpaceToDepthLayer.h"
57 #include "ops/SplitLayer.h"
58 #include "ops/SplitVLayer.h"
59 #include "ops/TileLayer.h"
60 #include "ops/TransposeLayer.h"
61 #include "ops/UnpackLayer.h"
62 #include "ops/SquaredDiffLayer.h"
63 #include "ops/L2NormLayer.h"
64 #include "ops/MatrixBandPartLayer.h"
65 #include "ops/BatchMatMulLayer.h"
66 #include "ops/BroadcastToLayer.h"
67 #include "ops/FusedBatchNormLayer.h"
68 #include "ops/LogSoftMaxLayer.h"
69 #include "ops/StatelessRandomUniformLayer.h"
71 #include <backend/Backend.h>
72 #include <backend/IConfig.h>
74 #include <util/Utils.h>
75 #include <util/logging.h>
76 #include <exec/DynamicShapeInferer.h>
90 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
92 switch (arithmetic_type_ir)
94 case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
95 return ops::ArithmeticType::kAdd;
96 case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
97 return ops::ArithmeticType::kSub;
98 case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
99 return ops::ArithmeticType::kMul;
100 case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
101 return ops::ArithmeticType::kDiv;
103 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
107 ops::ElementwiseActivationType
108 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
112 case ir::operation::ElementwiseActivation::Type::ELU:
113 return ops::ElementwiseActivationType::kElu;
114 case ir::operation::ElementwiseActivation::Type::LOGISTIC:
115 return ops::ElementwiseActivationType::kLogistic;
116 case ir::operation::ElementwiseActivation::Type::RELU:
117 return ops::ElementwiseActivationType::kReLU;
118 case ir::operation::ElementwiseActivation::Type::TANH:
119 return ops::ElementwiseActivationType::kTanh;
120 case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
121 return ops::ElementwiseActivationType::kLeakyReLU;
123 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
127 ops::ElementwiseBinaryType
128 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
132 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
133 return ops::ElementwiseBinaryType::kLogicalAnd;
134 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
135 return ops::ElementwiseBinaryType::kLogicalOr;
136 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
137 return ops::ElementwiseBinaryType::kMax;
138 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
139 return ops::ElementwiseBinaryType::kMin;
141 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
145 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
149 case ir::operation::ElementwiseUnary::Type::ABS:
150 return ops::ElementwiseUnaryType::kAbs;
151 case ir::operation::ElementwiseUnary::Type::CAST:
152 return ops::ElementwiseUnaryType::kCast;
153 case ir::operation::ElementwiseUnary::Type::COS:
154 return ops::ElementwiseUnaryType::kCos;
155 case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
156 return ops::ElementwiseUnaryType::kDequantize;
157 case ir::operation::ElementwiseUnary::Type::ERF:
158 return ops::ElementwiseUnaryType::kErf;
159 case ir::operation::ElementwiseUnary::Type::EXP:
160 return ops::ElementwiseUnaryType::kExp;
161 case ir::operation::ElementwiseUnary::Type::FLOOR:
162 return ops::ElementwiseUnaryType::kFloor;
163 case ir::operation::ElementwiseUnary::Type::LOG:
164 return ops::ElementwiseUnaryType::kLog;
165 case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
166 return ops::ElementwiseUnaryType::kLogicalNot;
167 case ir::operation::ElementwiseUnary::Type::NEG:
168 return ops::ElementwiseUnaryType::kNeg;
169 case ir::operation::ElementwiseUnary::Type::QUANTIZE:
170 return ops::ElementwiseUnaryType::kQuantize;
171 case ir::operation::ElementwiseUnary::Type::ROUND:
172 return ops::ElementwiseUnaryType::kRound;
173 case ir::operation::ElementwiseUnary::Type::RSQRT:
174 return ops::ElementwiseUnaryType::kRSqrt;
175 case ir::operation::ElementwiseUnary::Type::SIN:
176 return ops::ElementwiseUnaryType::kSin;
177 case ir::operation::ElementwiseUnary::Type::SQRT:
178 return ops::ElementwiseUnaryType::kSqrt;
179 case ir::operation::ElementwiseUnary::Type::SQUARE:
180 return ops::ElementwiseUnaryType::kSquare;
181 case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
182 return ops::ElementwiseUnaryType::kZerosLike;
184 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
188 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
192 case ir::operation::Pool2D::PoolType::AVG:
193 return ops::PoolType::kAvg;
194 case ir::operation::Pool2D::PoolType::MAX:
195 return ops::PoolType::kMax;
197 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
201 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
203 switch (reduce_type_ir)
205 case ir::operation::Reduce::ReduceType::ALL:
206 return ops::ReduceType::kAll;
207 case ir::operation::Reduce::ReduceType::ANY:
208 return ops::ReduceType::kAny;
209 case ir::operation::Reduce::ReduceType::MAX:
210 return ops::ReduceType::kMax;
211 case ir::operation::Reduce::ReduceType::MIN:
212 return ops::ReduceType::kMin;
213 case ir::operation::Reduce::ReduceType::PROD:
214 return ops::ReduceType::kProd;
215 case ir::operation::Reduce::ReduceType::SUM:
216 return ops::ReduceType::kSum;
218 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
223 KernelGenerator::KernelGenerator(
224 const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
225 const std::shared_ptr<TensorBuilder> &tensor_builder,
226 const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
227 const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
228 const std::shared_ptr<ExternalContext> &external_context)
229 : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
230 _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
231 _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
236 void KernelGenerator::visit(const ir::operation::AddN &node)
238 const auto output_index{node.getOutputs().at(0)};
240 std::vector<const IPortableTensor *> input_tensors;
241 for (auto &input_idx : node.getInputs())
242 input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
244 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
246 auto fn = std::make_unique<ops::AddNLayer>();
248 fn->configure(std::move(input_tensors), output_tensor);
250 _return_fn = std::move(fn);
253 void KernelGenerator::visit(const ir::OpSequence &op_seq)
255 assert(!_return_fn_seq);
256 assert(_tensor_builder->dynamicTensorManager());
259 auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
261 _return_fn_seq = std::make_unique<exec::FunctionSequence>();
263 // Prepare to handle dynamic tensors later
264 auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
266 dyn_ctx->op_seq = &op_seq;
267 dyn_ctx->operations = &_operations_ctx;
268 dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
269 dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
271 _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
274 _current_layout = op_seq.getLayout();
275 for (const auto &operation_idx : op_seq.operations())
277 const auto &node = _operations_ctx.at(operation_idx);
279 _return_fn_seq->append(releaseFunction());
281 for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
283 auto portable_tensor = _tensor_reg->getPortableTensor(ind);
286 assert(portable_tensor->layout() == ir::Layout::NHWC);
289 auto tensor = _tensor_reg->getNativeTensor(ind);
292 tensor->increase_ref();
298 void KernelGenerator::visit(const ir::operation::Conv2D &node)
300 using ir::operation::Conv2D;
302 const auto ofm_index{node.getOutputs().at(0)};
303 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
304 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
305 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
307 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
308 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
309 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
310 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
312 const auto stride = node.param().stride;
313 const auto activation = node.param().activation;
314 const auto param_padding = node.param().padding;
315 const auto dilation = node.param().dilation;
316 auto fn = std::make_unique<ops::ConvolutionLayer>();
318 if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
320 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
321 param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
322 stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
323 activation, ofm_tensor);
325 _return_fn = std::move(fn);
328 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
329 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
330 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
331 const auto &ker_shape = _ctx.at(ker_index).shape();
332 const auto ker_height = ker_shape.dim(1);
333 const auto ker_width = ker_shape.dim(2);
336 ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
337 dilation.width_factor, dilation.height_factor);
339 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
340 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
341 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
343 _return_fn = std::move(fn);
346 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
348 using ir::operation::DepthwiseConv2D;
350 const auto ofm_index{node.getOutputs().at(0)};
351 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
352 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
353 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
355 const auto stride = node.param().stride;
356 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
357 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
358 // Kernel format is [1, kernel_height, kernel_width, depth_out].
359 const auto &ker_shape = _ctx.at(ker_index).shape();
360 const auto ker_height = ker_shape.dim(1);
361 const auto ker_width = ker_shape.dim(2);
362 const auto dilation_width = node.param().dilation.width_factor;
363 const auto dilation_height = node.param().dilation.height_factor;
364 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
365 ker_width, ker_height, dilation_width, dilation_height);
366 const auto multiplier = node.param().multiplier;
367 const auto activation = node.param().activation;
369 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
370 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
371 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
372 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
374 auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
376 fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
377 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
378 dilation_height, activation, ofm_tensor, _external_context);
380 _return_fn = std::move(fn);
383 void KernelGenerator::visit(const ir::operation::Concat &node)
385 const auto ofm_index{node.getOutputs().at(0)};
387 const auto rank = _ctx.at(ofm_index).shape().rank();
388 const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
390 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
392 std::vector<const IPortableTensor *> input_tensors;
393 for (auto &ifm_idx : node.getInputs())
394 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
396 auto fn = std::make_unique<ops::ConcatLayer>();
398 fn->configure(input_tensors, axis, output_tensor);
400 _return_fn = std::move(fn);
403 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
405 const auto output_index{node.getOutputs().at(0)};
406 const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
407 const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
409 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
410 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
411 auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
413 auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
415 IPortableTensor *crops_alloc = nullptr;
416 const auto NNApiInputs = 2;
418 if (node.getInputs().size() != NNApiInputs)
420 const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
421 crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
424 fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
426 _return_fn = std::move(fn);
429 void KernelGenerator::visit(const ir::operation::Fill &node)
431 const auto output_index{node.getOutputs().at(0)};
432 // SHAPE input is used for shape inference
433 const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
435 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
436 auto value_tensor = _tensor_reg->getPortableTensor(value_index);
438 auto fn = std::make_unique<ops::FillLayer>();
440 fn->configure(value_tensor, output_tensor);
442 _return_fn = std::move(fn);
445 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
447 using ir::operation::FullyConnected;
449 const auto output_index{node.getOutputs().at(0)};
450 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
451 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
452 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
453 const auto activation = node.param().activation;
454 const auto weights_format = node.param().weights_format;
456 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
457 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
458 auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
459 auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
461 auto fn = std::make_unique<ops::FullyConnectedLayer>();
463 fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
466 _return_fn = std::move(fn);
469 void KernelGenerator::visit(const ir::operation::Reshape &node)
471 const auto output_index{node.getOutputs().at(0)};
472 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
474 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
475 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
477 // optional 2nd input
478 IPortableTensor *shape_tensor = nullptr;
480 if (node.getInputs().size() == 2)
482 const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
483 shape_tensor = _tensor_reg->getPortableTensor(shape_index);
486 auto fn = std::make_unique<ops::ReshapeLayer>();
488 fn->configure(input_tensor, shape_tensor, output_tensor);
489 _return_fn = std::move(fn);
492 void KernelGenerator::visit(const ir::operation::Squeeze &node)
494 const auto output_index{node.getOutputs().at(0)};
495 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
497 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
498 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
500 // Squeeze can share same kernel with reshape
501 auto fn = std::make_unique<ops::ReshapeLayer>();
503 fn->configure(input_tensor, nullptr, output_tensor);
505 _return_fn = std::move(fn);
508 void KernelGenerator::visit(const ir::operation::Softmax &node)
510 const auto output_index{node.getOutputs().at(0)};
511 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
513 const auto beta = node.param().beta;
515 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
516 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
518 auto fn = std::make_unique<ops::SoftMaxLayer>();
520 fn->configure(input_tensor, beta, output_tensor);
522 _return_fn = std::move(fn);
525 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
527 const auto ofm_index{node.getOutputs().at(0)};
528 const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
529 const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
531 const auto activation = node.param().activation;
533 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
534 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
535 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
537 auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
539 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
540 convertArithmeticType(node.param().arithmetic_type));
542 _return_fn = std::move(fn);
545 void KernelGenerator::visit(const ir::operation::Comparison &node)
547 const auto ofm_index{node.getOutputs().at(0)};
548 const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
549 const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
551 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
552 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
553 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
555 auto comparison_type = node.param().comparison_type;
557 auto fn = std::make_unique<ops::CompareLayer>();
559 fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
561 _return_fn = std::move(fn);
564 void KernelGenerator::visit(const ir::operation::Gather &node)
566 const auto output_index{node.getOutputs().at(0)};
567 const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
568 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
570 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
571 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
572 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
574 const auto backend_layout = output_tensor->layout();
575 UNUSED_RELEASE(backend_layout);
577 // NOTE The frontend layout and backend layout must be the same for this operation.
578 // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
579 // is not not efficient even if it works well. If so, it would be better to set the
580 // layout of these backend tensors to the same layout.
581 // There is also one thing we have to think about. This operation depends on the layout of
582 // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
583 // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
584 // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
585 assert(backend_layout == input_tensor->layout());
586 assert(backend_layout == indices_tensor->layout());
587 const auto &input_shape = _ctx.at(input_index).shape();
588 UNUSED_RELEASE(input_shape);
589 assert(input_shape.rank() < 4 || _current_layout == backend_layout);
591 const auto axis_raw = node.param().axis;
592 const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
594 auto fn = std::make_unique<ops::GatherLayer>();
596 fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
598 _return_fn = std::move(fn);
601 void KernelGenerator::visit(const ir::operation::OneHot &node)
603 const auto output_index{node.getOutputs().at(0)};
604 const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
605 const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
606 const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
607 const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
609 const auto axis = node.param().axis;
611 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
612 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
613 auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
614 auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
615 auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
617 assert(indices_tensor->data_type() == OperandType::INT32);
618 assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
620 auto fn = std::make_unique<ops::OneHotLayer>();
622 fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
624 _return_fn = std::move(fn);
627 void KernelGenerator::visit(const ir::operation::Einsum &node)
629 const auto ofm_index{node.getOutputs().at(0)};
631 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
632 std::vector<const IPortableTensor *> input_tensors;
633 for (auto &ifm_idx : node.getInputs())
634 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
636 const auto equation = node.param().equation;
638 auto fn = std::make_unique<ops::EinsumLayer>();
640 fn->configure(input_tensors, equation, output_tensor);
642 _return_fn = std::move(fn);
645 void KernelGenerator::visit(const ir::operation::Custom &node)
647 auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
648 std::vector<custom::TypeInfo> &types,
649 std::vector<IPortableTensor *> &tensors) {
650 for (auto &idx : opSeq)
652 const auto &operand = _ctx.at(idx);
653 // TODO make sure using `_current_layout` is correct for custom operations
654 types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
655 auto in_tensor = _tensor_reg->getPortableTensor(idx);
656 tensors.emplace_back(in_tensor);
660 backend::custom::CustomKernelConfigParams params{};
662 fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
663 fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
665 params.userdata = node.userdata().data;
666 params.userdata_size = node.userdata().size;
668 auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
670 _return_fn = std::move(fn);
673 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
675 const auto output_index{node.getOutputs().at(0)};
676 const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
678 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
679 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
681 auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
683 fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
684 convertElementwiseActivationType(node.param().op_type));
686 _return_fn = std::move(fn);
689 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
691 const auto output_index{node.getOutputs().at(0)};
692 const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
693 const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
695 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
696 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
697 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
699 auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
701 fn->configure(lhs_tensor, rhs_tensor, output_tensor,
702 convertElementwiseBinaryType(node.param().op_type));
704 _return_fn = std::move(fn);
707 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
709 const auto output_index{node.getOutputs().at(0)};
710 const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
712 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
713 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
715 auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
717 fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
719 _return_fn = std::move(fn);
722 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
724 const auto output_index{node.getOutputs().at(0)};
725 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
726 // AXIS input is used for output shape inference
728 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
729 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
731 auto fn = std::make_unique<ops::ExpandDimsLayer>();
733 fn->configure(input_tensor, output_tensor);
735 _return_fn = std::move(fn);
738 void KernelGenerator::visit(const ir::operation::Pack &node)
740 const auto ofm_index{node.getOutputs().at(0)};
742 const auto rank = _ctx.at(ofm_index).shape().rank();
743 const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
745 assert(-rank <= axis && axis < rank);
747 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
749 std::vector<const IPortableTensor *> input_tensors;
750 for (auto &ifm_idx : node.getInputs())
751 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
753 auto fn = std::make_unique<ops::PackLayer>();
755 fn->configure(input_tensors, axis, output_tensor);
757 _return_fn = std::move(fn);
760 void KernelGenerator::visit(const ir::operation::Unpack &node)
762 const auto input_index{node.getInputs().at(0)};
764 const auto rank = _ctx.at(input_index).shape().rank();
765 const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
767 assert(rank == 0 || (-rank <= axis && axis < rank));
769 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
771 std::vector<IPortableTensor *> output_tensors;
772 for (auto &output_idx : node.getOutputs())
773 output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
775 auto fn = std::make_unique<ops::UnpackLayer>();
777 uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
779 fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
781 _return_fn = std::move(fn);
784 void KernelGenerator::visit(const ir::operation::Pad &node)
786 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
787 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
788 const auto output_index{node.getOutputs().at(0)};
789 assert(_ctx.at(pad_index).data());
791 auto input = _tensor_reg->getPortableTensor(input_index);
792 auto output = _tensor_reg->getPortableTensor(output_index);
793 auto pad_rank = _ctx.at(pad_index).shape().dim(0);
794 auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
796 auto fn = std::make_unique<ops::PadLayer>();
798 bool isPadV2 = node.getInputs().size() == 3 ? true : false;
799 const void *value = nullptr;
803 const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
804 value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
807 fn->configure(input, output, pad_base, pad_rank, value);
808 _return_fn = std::move(fn);
811 void KernelGenerator::visit(const ir::operation::Transpose &node)
813 const auto output_index{node.getOutputs().at(0)};
814 const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
815 const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
817 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
818 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
819 auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
821 auto fn = std::make_unique<ops::TransposeLayer>();
823 fn->configure(input_tensor, perm_tensor, output_tensor);
825 _return_fn = std::move(fn);
828 void KernelGenerator::visit(const ir::operation::Reduce &node)
830 const auto output_index{node.getOutputs().at(0)};
831 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
832 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
834 const auto keep_dims = node.param().keep_dims;
835 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
836 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
837 auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
839 if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
841 auto fn = std::make_unique<ops::MeanLayer>();
843 fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
845 _return_fn = std::move(fn);
849 auto fn = std::make_unique<ops::ReduceLayer>();
851 const auto reduce_type = convertReduceType(node.param().reduce_type);
852 fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
854 _return_fn = std::move(fn);
858 void KernelGenerator::visit(const ir::operation::Select &node)
860 const auto output_index{node.getOutputs().at(0)};
861 const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
862 const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
863 const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
865 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
866 auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
867 auto true_tensor = _tensor_reg->getPortableTensor(true_index);
868 auto false_tensor = _tensor_reg->getPortableTensor(false_index);
870 auto fn = std::make_unique<ops::SelectLayer>();
872 fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
874 _return_fn = std::move(fn);
877 void KernelGenerator::visit(const ir::operation::Slice &node)
879 const auto output_index{node.getOutputs().at(0)};
880 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
881 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
882 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
884 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
885 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
886 auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
887 auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
889 auto fn = std::make_unique<ops::SliceLayer>();
891 fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
893 _return_fn = std::move(fn);
896 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
898 const auto output_index{node.getOutputs().at(0)};
899 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
900 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
901 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
902 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
904 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
905 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
906 auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
907 auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
908 auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
910 auto begin_mask = node.param().begin_mask;
911 auto end_mask = node.param().end_mask;
912 auto shrink_axis_mask = node.param().shrink_axis_mask;
914 auto fn = std::make_unique<ops::StridedSliceLayer>();
916 fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
917 end_mask, shrink_axis_mask);
919 _return_fn = std::move(fn);
922 void KernelGenerator::visit(const ir::operation::Split &node)
924 const auto num_splits = node.param().num_splits;
925 assert(num_splits == static_cast<int>(node.getOutputs().size()));
927 const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
928 const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
930 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
931 auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
933 std::vector<IPortableTensor *> out_tensors;
934 for (auto &output_idx : node.getOutputs())
935 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
937 auto fn = std::make_unique<ops::SplitLayer>();
939 fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
941 _return_fn = std::move(fn);
944 void KernelGenerator::visit(const ir::operation::Shape &node)
946 const auto ofm_index{node.getOutputs().at(0)};
947 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
949 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
950 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
952 auto fn = std::make_unique<ops::ShapeLayer>();
954 fn->configure(ifm_tensor, ofm_tensor);
956 _return_fn = std::move(fn);
959 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
961 const auto output_index{node.getOutputs().at(0)};
962 const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
964 auto align_corners = node.param().align_corners;
965 auto half_pixel_centers = node.param().half_pixel_centers;
967 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
968 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
970 auto fn = std::make_unique<ops::ResizeBilinearLayer>();
972 if (node.getInputs().size() == 1)
974 fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
975 align_corners, half_pixel_centers);
979 assert(node.getInputs().size() == 2);
980 const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
981 auto size_tensor = _tensor_reg->getPortableTensor(size_index);
982 if (size_tensor->is_constant())
984 auto size_vec = _ctx.at(size_index).asVector<int32_t>();
985 const auto height_out = size_vec[0];
986 const auto width_out = size_vec[1];
987 fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
992 fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
996 _return_fn = std::move(fn);
999 void KernelGenerator::visit(const ir::operation::Reverse &node)
1001 const auto output_index{node.getOutputs().at(0)};
1002 const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
1003 const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
1005 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1006 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1007 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1009 auto fn = std::make_unique<ops::ReverseLayer>();
1011 fn->configure(input_tensor, axis_tensor, output_tensor);
1013 _return_fn = std::move(fn);
1016 void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
1018 const auto output_index{node.getOutputs().at(0)};
1019 const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
1020 const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
1022 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1023 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1024 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1026 auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1028 fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
1030 _return_fn = std::move(fn);
1033 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1035 const auto ofm_index{node.getOutputs().at(0)};
1036 const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1038 const auto kh = node.param().kh;
1039 const auto kw = node.param().kw;
1040 const auto stride = node.param().stride;
1041 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
1042 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
1043 const auto padding =
1044 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1045 const auto activation = node.param().activation;
1047 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1048 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1050 auto fn = std::make_unique<ops::PoolLayer>();
1052 fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1053 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1054 convertPoolType(node.param().op_type));
1056 _return_fn = std::move(fn);
1059 void KernelGenerator::visit(const ir::operation::Pow &node)
1061 const auto output_index{node.getOutputs().at(0)};
1062 const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1063 const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1065 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1066 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1067 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1069 auto fn = std::make_unique<ops::PowLayer>();
1071 fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1073 _return_fn = std::move(fn);
1076 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1078 const auto output_index{node.getOutputs().at(0)};
1079 const auto input_index{node.getInputs().at(0)};
1081 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1082 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1084 auto fn = std::make_unique<ops::L2NormLayer>();
1086 fn->configure(input_alloc, output_alloc);
1088 _return_fn = std::move(fn);
1091 void KernelGenerator::visit(const ir::operation::Range &node)
1093 const auto output_index{node.getOutputs().at(0)};
1094 const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1095 const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1096 const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1098 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1099 auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1100 auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1101 auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1103 auto fn = std::make_unique<ops::RangeLayer>();
1105 fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1106 _return_fn = std::move(fn);
1109 void KernelGenerator::visit(const ir::operation::Rank &node)
1111 const auto ofm_index{node.getOutputs().at(0)};
1112 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1114 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1115 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1117 auto fn = std::make_unique<ops::RankLayer>();
1119 fn->configure(ifm_tensor, ofm_tensor);
1121 _return_fn = std::move(fn);
1124 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1126 const auto ofm_index{node.getOutputs().at(0)};
1127 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1128 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1130 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1131 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1132 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1134 auto fn = std::make_unique<ops::SqDiffLayer>();
1136 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1137 _return_fn = std::move(fn);
1140 void KernelGenerator::visit(const ir::operation::Tile &node)
1142 const auto output_index{node.getOutputs().at(0)};
1143 const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1144 const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1146 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1147 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1148 auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1150 auto fn = std::make_unique<ops::TileLayer>();
1152 fn->configure(input_tensor, multiples_tensor, output_tensor);
1153 _return_fn = std::move(fn);
1156 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1158 const auto output_index{node.getOutputs().at(0)};
1159 const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1160 const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1161 const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1163 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1164 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1165 auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1166 auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1168 auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1170 fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1171 _return_fn = std::move(fn);
1174 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1176 const auto output_index{node.getOutputs().at(0)};
1177 const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1178 const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1180 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1181 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1182 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1184 const auto adj_x = node.param().adj_x;
1185 const auto adj_y = node.param().adj_y;
1187 auto fn = std::make_unique<ops::BatchMatMulLayer>();
1189 fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1190 _return_fn = std::move(fn);
1193 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1195 const auto output_index{node.getOutputs().at(0)};
1196 const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1197 const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1199 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1200 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1201 auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1203 auto fn = std::make_unique<ops::BroadcastToLayer>();
1205 fn->configure(input_tensor, shape_tensor, output_tensor);
1207 _return_fn = std::move(fn);
1210 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1212 const auto ofm_index{node.getOutputs().at(0)};
1214 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1215 std::vector<const IPortableTensor *> input_tensors;
1216 for (auto &ifm_idx : node.getInputs())
1217 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1219 const auto epsilon = node.param().epsilon;
1220 const auto is_training = node.param().is_training;
1221 const auto data_format = node.param().data_format;
1223 auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1225 fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1227 _return_fn = std::move(fn);
1230 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1232 const auto output_index{node.getOutputs().at(0)};
1233 const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1235 const auto beta = node.param().beta;
1236 const auto axis = node.param().axis;
1238 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1239 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1241 auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1243 fn->configure(input_tensor, beta, axis, output_tensor);
1245 _return_fn = std::move(fn);
1248 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1250 const auto output_index{node.getOutputs().at(0)};
1251 const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1252 const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1253 const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1255 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1256 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1257 auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1258 auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1260 auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1262 fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1264 _return_fn = std::move(fn);
1267 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1269 const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1270 const auto output_index{node.getOutputs().at(0)};
1271 auto block_size = node.param().block_size;
1273 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1274 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1276 auto fn = std::make_unique<ops::DepthToSpaceLayer>();
1278 fn->configure(input_tensor, block_size, output_tensor);
1279 _return_fn = std::move(fn);
1282 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1284 const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1285 const auto output_index{node.getOutputs().at(0)};
1286 auto block_size = node.param().block_size;
1288 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1289 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1291 auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1293 fn->configure(input_tensor, block_size, output_tensor);
1294 _return_fn = std::move(fn);
1297 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1299 const auto output_index{node.getOutputs().at(0)};
1300 const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1301 const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1303 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1304 auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1305 auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1307 auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1309 fn->configure(shape_alloc, seed_alloc, output_alloc);
1310 _return_fn = std::move(fn);
1313 void KernelGenerator::visit(const ir::operation::SplitV &node)
1315 const auto num_splits = node.param().num_splits;
1316 assert(num_splits == static_cast<int>(node.getOutputs().size()));
1318 const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1319 const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1320 const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1322 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1323 auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1324 auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1326 std::vector<IPortableTensor *> out_tensors;
1327 for (auto &output_idx : node.getOutputs())
1328 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1330 auto fn = std::make_unique<ops::SplitVLayer>();
1332 fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1334 _return_fn = std::move(fn);
1337 void KernelGenerator::visit(const ir::operation::LSTM &node)
1339 const auto scratch_buffer_index{
1340 node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1341 const auto output_state_out_index{
1342 node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1343 const auto cell_state_out_index{
1344 node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1345 const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1347 const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1348 const auto input_to_input_weights_index{
1349 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1350 const auto input_to_forget_weights_index{
1351 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1352 const auto input_to_cell_weights_index{
1353 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1354 const auto input_to_output_weights_index{
1355 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1356 const auto recurrent_to_input_weights_index{
1357 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1358 const auto recurrent_to_forget_weights_index{
1359 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1360 const auto recurrent_to_cell_weights_index{
1361 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1362 const auto recurrent_to_output_weights_index{
1363 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1364 const auto cell_to_input_weights_index{
1365 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1366 const auto cell_to_forget_weights_index{
1367 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1368 const auto cell_to_output_weights_index{
1369 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1370 const auto input_gate_bias_index{
1371 node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1372 const auto forget_gate_bias_index{
1373 node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1374 const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1375 const auto output_gate_bias_index{
1376 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1377 const auto projection_weights_index{
1378 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1379 const auto projection_bias_index{
1380 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1381 const auto output_state_in_index{
1382 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1383 const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1384 const auto time_major = node.param().time_major;
1386 // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1387 // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1388 // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1389 // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1390 bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1391 (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1392 _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1393 bool has_recurrent_to_input_weights =
1394 _ctx.exist(recurrent_to_input_weights_index) &&
1395 (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1396 _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1398 // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1399 // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1400 // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1401 // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1402 bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1403 _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1404 bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1405 _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1407 bool has_input_gate_bias =
1408 _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1410 bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1411 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1412 _ctx.at(projection_weights_index).shape().dim(1) != 0);
1413 bool has_projection_bias =
1414 _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1416 auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1417 ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1418 : nullptr; // optional
1419 auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1420 ? _tensor_reg->getPortableTensor(output_state_out_index)
1421 : nullptr; // optional
1422 auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1423 ? _tensor_reg->getPortableTensor(cell_state_out_index)
1424 : nullptr; // optional
1425 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1427 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1429 auto input_to_input_weights_tensor =
1430 has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1431 : nullptr; // optional
1432 auto input_to_forget_weights_tensor =
1433 _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1434 auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1435 auto input_to_output_weights_tensor =
1436 _tensor_reg->getPortableTensor(input_to_output_weights_index);
1437 auto recurrent_to_input_weights_tensor =
1438 has_recurrent_to_input_weights
1439 ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1440 : nullptr; // optional
1441 auto recurrent_to_forget_weights_tensor =
1442 _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1443 auto recurrent_to_cell_weights_tensor =
1444 _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1445 auto recurrent_to_output_weights_tensor =
1446 _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1448 auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1449 auto cell_to_forget_weights_tensor =
1450 has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1451 : nullptr; // optional
1452 auto cell_to_output_weights_tensor =
1453 has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1454 : nullptr; // optional
1456 auto input_gate_bias_tensor =
1457 has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1458 auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1459 auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1460 auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1461 auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1462 auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1464 auto projection_weights_tensor = has_projection_weights
1465 ? _tensor_reg->getPortableTensor(projection_weights_index)
1466 : nullptr; // optional
1467 auto projection_bias_tensor = has_projection_bias
1468 ? _tensor_reg->getPortableTensor(projection_bias_index)
1469 : nullptr; // optional
1471 IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1472 IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1473 IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1474 IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1475 if (node.getInputs().size() == 24)
1477 const auto input_layer_norm_weights_index{
1478 node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1479 const auto forget_layer_norm_weights_index{
1480 node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1481 const auto cell_layer_norm_weights_index{
1482 node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1483 const auto output_layer_norm_weights_index{
1484 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1486 input_layer_norm_weights_tensor =
1487 _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1488 forget_layer_norm_weights_tensor =
1489 _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1490 cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1491 output_layer_norm_weights_tensor =
1492 _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1495 auto fn = std::make_unique<ops::LSTMLayer>();
1498 input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1499 input_to_cell_weights_tensor, input_to_output_weights_tensor,
1500 recurrent_to_input_weights_tensor, recurrent_to_forget_weights_tensor,
1501 recurrent_to_cell_weights_tensor, recurrent_to_output_weights_tensor,
1502 cell_to_input_weights_tensor, cell_to_forget_weights_tensor, cell_to_output_weights_tensor,
1503 input_layer_norm_weights_tensor, forget_layer_norm_weights_tensor,
1504 cell_layer_norm_weights_tensor, output_layer_norm_weights_tensor,
1505 /*aux_input=*/nullptr,
1506 /*aux_input_to_input_weights=*/nullptr,
1507 /*aux_input_to_forget_weights=*/nullptr,
1508 /*aux_input_to_cell_weights=*/nullptr,
1509 /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1510 cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1511 projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1512 /*forward_sequence=*/true, time_major,
1513 /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1515 !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1516 !_ctx.at(cell_state_in_index).info().isVariable());
1518 _return_fn = std::move(fn);
1522 } // namespace backend
1523 } // namespace onert