2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "KernelGenerator.h"
19 #include "ops/ArgMinMaxLayer.h"
20 #include "ops/BatchToSpaceNDLayer.h"
21 #include "ops/BinaryArithmeticLayer.h"
22 #include "ops/CompareLayer.h"
23 #include "ops/ConcatLayer.h"
24 #include "ops/ConvolutionLayer.h"
25 #include "ops/DepthwiseConvolutionLayer.h"
26 #include "ops/EinsumLayer.h"
27 #include "ops/ElementwiseActivationLayer.h"
28 #include "ops/ElementwiseBinaryLayer.h"
29 #include "ops/ElementwiseUnaryLayer.h"
30 #include "ops/ExpandDimsLayer.h"
31 #include "ops/FillLayer.h"
32 #include "ops/FullyConnectedLayer.h"
33 #include "ops/GatherLayer.h"
34 #include "ops/MeanLayer.h"
35 #include "ops/OneHotLayer.h"
36 #include "ops/OperationUtils.h"
37 #include "ops/PackLayer.h"
38 #include "ops/PadLayer.h"
39 #include "ops/PoolLayer.h"
40 #include "ops/PowLayer.h"
41 #include "ops/RangeLayer.h"
42 #include "ops/RankLayer.h"
43 #include "ops/ReduceLayer.h"
44 #include "ops/ReshapeLayer.h"
45 #include "ops/ResizeBilinearLayer.h"
46 #include "ops/ReverseLayer.h"
47 #include "ops/SelectLayer.h"
48 #include "ops/ShapeLayer.h"
49 #include "ops/SliceLayer.h"
50 #include "ops/SoftMaxLayer.h"
51 #include "ops/StridedSliceLayer.h"
52 #include "ops/SpaceToBatchNDLayer.h"
53 #include "ops/SpaceToDepthLayer.h"
54 #include "ops/SplitLayer.h"
55 #include "ops/SplitVLayer.h"
56 #include "ops/TileLayer.h"
57 #include "ops/TransposeLayer.h"
58 #include "ops/UnpackLayer.h"
59 #include "ops/SquaredDiffLayer.h"
60 #include "ops/L2NormLayer.h"
61 #include "ops/MatrixBandPartLayer.h"
62 #include "ops/BatchMatMulLayer.h"
63 #include "ops/BroadcastToLayer.h"
64 #include "ops/FusedBatchNormLayer.h"
65 #include "ops/LogSoftMaxLayer.h"
66 #include "ops/StatelessRandomUniformLayer.h"
68 #include <backend/Backend.h>
69 #include <backend/IConfig.h>
71 #include <util/Utils.h>
72 #include <util/logging.h>
73 #include <exec/DynamicShapeInference.h>
87 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
89 switch (arithmetic_type_ir)
91 case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
92 return ops::ArithmeticType::kAdd;
93 case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
94 return ops::ArithmeticType::kSub;
95 case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
96 return ops::ArithmeticType::kMul;
97 case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
98 return ops::ArithmeticType::kDiv;
100 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
104 ops::ElementwiseActivationType
105 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
109 case ir::operation::ElementwiseActivation::Type::LOGISTIC:
110 return ops::ElementwiseActivationType::kLogistic;
111 case ir::operation::ElementwiseActivation::Type::RELU:
112 return ops::ElementwiseActivationType::kReLU;
113 case ir::operation::ElementwiseActivation::Type::TANH:
114 return ops::ElementwiseActivationType::kTanh;
116 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
120 ops::ElementwiseBinaryType
121 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
125 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
126 return ops::ElementwiseBinaryType::kLogicalOr;
127 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
128 return ops::ElementwiseBinaryType::kMax;
129 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
130 return ops::ElementwiseBinaryType::kMin;
132 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
136 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
140 case ir::operation::ElementwiseUnary::Type::ABS:
141 return ops::ElementwiseUnaryType::kAbs;
142 case ir::operation::ElementwiseUnary::Type::CAST:
143 return ops::ElementwiseUnaryType::kCast;
144 case ir::operation::ElementwiseUnary::Type::COS:
145 return ops::ElementwiseUnaryType::kCos;
146 case ir::operation::ElementwiseUnary::Type::ERF:
147 return ops::ElementwiseUnaryType::kErf;
148 case ir::operation::ElementwiseUnary::Type::EXP:
149 return ops::ElementwiseUnaryType::kExp;
150 case ir::operation::ElementwiseUnary::Type::LOG:
151 return ops::ElementwiseUnaryType::kLog;
152 case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
153 return ops::ElementwiseUnaryType::kLogicalNot;
154 case ir::operation::ElementwiseUnary::Type::NEG:
155 return ops::ElementwiseUnaryType::kNeg;
156 case ir::operation::ElementwiseUnary::Type::QUANTIZE:
157 return ops::ElementwiseUnaryType::kQuantize;
158 case ir::operation::ElementwiseUnary::Type::ROUND:
159 return ops::ElementwiseUnaryType::kRound;
160 case ir::operation::ElementwiseUnary::Type::RSQRT:
161 return ops::ElementwiseUnaryType::kRSqrt;
162 case ir::operation::ElementwiseUnary::Type::SIN:
163 return ops::ElementwiseUnaryType::kSin;
164 case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
165 return ops::ElementwiseUnaryType::kZerosLike;
167 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
171 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
175 case ir::operation::Pool2D::PoolType::AVG:
176 return ops::PoolType::kAvg;
177 case ir::operation::Pool2D::PoolType::MAX:
178 return ops::PoolType::kMax;
180 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
184 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
186 switch (reduce_type_ir)
188 case ir::operation::Reduce::ReduceType::ALL:
189 return ops::ReduceType::kAll;
190 case ir::operation::Reduce::ReduceType::ANY:
191 return ops::ReduceType::kAny;
192 case ir::operation::Reduce::ReduceType::MAX:
193 return ops::ReduceType::kMax;
194 case ir::operation::Reduce::ReduceType::MIN:
195 return ops::ReduceType::kMin;
196 case ir::operation::Reduce::ReduceType::PROD:
197 return ops::ReduceType::kProd;
198 case ir::operation::Reduce::ReduceType::SUM:
199 return ops::ReduceType::kSum;
201 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
206 KernelGenerator::KernelGenerator(
207 const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
208 const std::shared_ptr<TensorBuilder> &tensor_builder,
209 const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
210 const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
211 const std::shared_ptr<ExternalContext> &external_context)
212 : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
213 _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
214 _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
219 void KernelGenerator::visit(const ir::OpSequence &op_seq)
221 assert(!_return_fn_seq);
222 assert(_tensor_builder->dynamicTensorManager());
225 auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
227 _return_fn_seq = std::make_unique<exec::FunctionSequence>();
229 // Prepare to handle dynamic tensors later
230 auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
232 dyn_ctx->op_seq = &op_seq;
233 dyn_ctx->operations = &_operations_ctx;
234 dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
235 dyn_ctx->tensor_registry = _tensor_reg;
236 dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
238 _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
240 _return_fn_seq->enableDynamicShapeInferer(true);
242 _current_op_seq_layout = op_seq.getLayout();
243 for (const auto &operation_idx : op_seq.operations())
245 const auto &node = _operations_ctx.at(operation_idx);
247 _return_fn_seq->append(releaseFunction());
249 for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
251 auto portable_tensor = _tensor_reg->getPortableTensor(ind);
254 assert(portable_tensor->layout() == ir::Layout::NHWC);
257 auto tensor = _tensor_reg->getNativeTensor(ind);
260 tensor->increase_ref();
266 void KernelGenerator::visit(const ir::operation::Conv2D &node)
268 using ir::operation::Conv2D;
270 const auto ofm_index{node.getOutputs().at(0)};
271 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
272 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
273 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
275 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
276 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
277 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
278 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
280 const auto stride = node.param().stride;
281 const auto activation = node.param().activation;
282 const auto param_padding = node.param().padding;
283 const auto dilation = node.param().dilation;
284 auto fn = std::make_unique<ops::ConvolutionLayer>();
286 if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
288 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
289 param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
290 stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
291 activation, ofm_tensor);
293 _return_fn = std::move(fn);
296 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
297 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
298 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
299 const auto &ker_shape = _ctx.at(ker_index).shape();
300 const auto ker_height = ker_shape.dim(1);
301 const auto ker_width = ker_shape.dim(2);
304 ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
305 dilation.width_factor, dilation.height_factor);
307 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
308 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
309 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
311 _return_fn = std::move(fn);
314 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
316 using ir::operation::DepthwiseConv2D;
318 const auto ofm_index{node.getOutputs().at(0)};
319 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
320 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
321 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
323 const auto stride = node.param().stride;
324 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
325 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
326 // Kernel format is [1, kernel_height, kernel_width, depth_out].
327 const auto &ker_shape = _ctx.at(ker_index).shape();
328 const auto ker_height = ker_shape.dim(1);
329 const auto ker_width = ker_shape.dim(2);
330 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
331 ker_width, ker_height);
332 const auto multiplier = node.param().multiplier;
333 const auto activation = node.param().activation;
335 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
336 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
337 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
338 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
340 auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
342 fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
343 padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
346 _return_fn = std::move(fn);
349 void KernelGenerator::visit(const ir::operation::Concat &node)
351 const auto ofm_index{node.getOutputs().at(0)};
353 const auto rank = _ctx.at(ofm_index).shape().rank();
354 const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
356 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
358 std::vector<const IPortableTensor *> input_tensors;
359 for (auto &ifm_idx : node.getInputs())
360 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
362 auto fn = std::make_unique<ops::ConcatLayer>();
364 fn->configure(input_tensors, axis, output_tensor);
366 _return_fn = std::move(fn);
369 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
371 const auto output_index{node.getOutputs().at(0)};
372 const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
373 const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
375 auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
376 auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
377 auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
379 auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
381 IPortableTensor *crops_alloc = nullptr;
382 const auto NNApiInputs = 2;
384 if (node.getInputs().size() != NNApiInputs)
386 const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
387 crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
390 fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
392 _return_fn = std::move(fn);
395 void KernelGenerator::visit(const ir::operation::Fill &node)
397 const auto output_index{node.getOutputs().at(0)};
398 const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
399 const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
401 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
402 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
403 auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
405 auto fn = std::make_unique<ops::FillLayer>();
407 fn->configure(input_tensor, value_tensor, output_tensor);
409 _return_fn = std::move(fn);
412 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
414 using ir::operation::FullyConnected;
416 const auto output_index{node.getOutputs().at(0)};
417 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
418 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
419 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
420 const auto activation = node.param().activation;
422 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
423 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
424 auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
426 bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
428 auto fn = std::make_unique<ops::FullyConnectedLayer>();
430 fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor,
433 _return_fn = std::move(fn);
436 void KernelGenerator::visit(const ir::operation::Reshape &node)
438 const auto output_index{node.getOutputs().at(0)};
439 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
441 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
442 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
444 // optional 2nd input
445 IPortableTensor *shape_tensor = nullptr;
447 if (node.getInputs().size() == 2)
449 const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
450 shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
453 auto fn = std::make_unique<ops::ReshapeLayer>();
455 fn->configure(input_tensor, shape_tensor, output_tensor);
456 _return_fn = std::move(fn);
459 void KernelGenerator::visit(const ir::operation::Squeeze &node)
461 const auto output_index{node.getOutputs().at(0)};
462 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
464 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
465 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
467 // Squeeze can share same kernel with reshape
468 auto fn = std::make_unique<ops::ReshapeLayer>();
470 fn->configure(input_tensor, nullptr, output_tensor);
472 _return_fn = std::move(fn);
475 void KernelGenerator::visit(const ir::operation::Softmax &node)
477 const auto output_index{node.getOutputs().at(0)};
478 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
480 const auto beta = node.param().beta;
482 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
483 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
485 auto fn = std::make_unique<ops::SoftMaxLayer>();
487 fn->configure(input_tensor, beta, output_tensor);
489 _return_fn = std::move(fn);
492 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
494 const auto ofm_index{node.getOutputs().at(0)};
495 const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
496 const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
498 const auto activation = node.param().activation;
500 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
501 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
502 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
504 auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
506 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
507 convertArithmeticType(node.param().arithmetic_type));
509 _return_fn = std::move(fn);
512 void KernelGenerator::visit(const ir::operation::Comparison &node)
514 const auto ofm_index{node.getOutputs().at(0)};
515 const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
516 const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
518 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
519 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
520 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
522 auto comparison_type = node.param().comparison_type;
524 auto fn = std::make_unique<ops::CompareLayer>();
526 fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
528 _return_fn = std::move(fn);
531 void KernelGenerator::visit(const ir::operation::Gather &node)
533 const auto output_index{node.getOutputs().at(0)};
534 const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
535 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
537 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
538 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
539 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
541 const auto backend_layout = output_tensor->layout();
542 UNUSED_RELEASE(backend_layout);
544 // NOTE The frontend layout and backend layout must be the same for this operation.
545 // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
546 // is not not efficient even if it works well. If so, it would be better to set the
547 // layout of these backend tensors to the same layout.
548 // There is also one thing we have to think about. This operation depends on the layout of
549 // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
550 // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
551 // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
552 assert(backend_layout == input_tensor->layout());
553 assert(backend_layout == indices_tensor->layout());
554 const auto &input_shape = _ctx.at(input_index).shape();
555 UNUSED_RELEASE(input_shape);
556 assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
558 const auto axis_raw = node.param().axis;
559 const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
561 auto fn = std::make_unique<ops::GatherLayer>();
563 fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
565 _return_fn = std::move(fn);
568 void KernelGenerator::visit(const ir::operation::OneHot &node)
570 const auto output_index{node.getOutputs().at(0)};
571 const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
572 const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
573 const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
574 const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
576 const auto axis = node.param().axis;
578 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
579 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
580 auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
581 auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
582 auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
584 assert(indices_tensor->data_type() == OperandType::INT32);
585 assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
587 auto fn = std::make_unique<ops::OneHotLayer>();
589 fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
591 _return_fn = std::move(fn);
594 void KernelGenerator::visit(const ir::operation::Einsum &node)
596 const auto ofm_index{node.getOutputs().at(0)};
598 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
599 std::vector<const IPortableTensor *> input_tensors;
600 for (auto &ifm_idx : node.getInputs())
601 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
603 const auto equation = node.param().equation;
605 auto fn = std::make_unique<ops::EinsumLayer>();
607 fn->configure(input_tensors, equation, output_tensor);
609 _return_fn = std::move(fn);
612 void KernelGenerator::visit(const ir::operation::Custom &node)
614 auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
615 std::vector<custom::TypeInfo> &types,
616 std::vector<std::shared_ptr<IPortableTensor>> &tensors) {
617 for (auto &idx : opSeq)
619 const auto &operand = _ctx.at(idx);
620 // TODO make sure using `_current_op_seq_layout` is correct for custom operations
621 types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
622 auto in_tensor = _tensor_reg->getPortableTensor(idx);
623 tensors.emplace_back(in_tensor);
627 backend::custom::CustomKernelConfigParams params{};
629 fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
630 fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
632 params.userdata = node.userdata().data;
633 params.userdata_size = node.userdata().size;
635 auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
637 _return_fn = std::move(fn);
640 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
642 const auto output_index{node.getOutputs().at(0)};
643 const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
645 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
646 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
648 auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
650 fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
651 convertElementwiseActivationType(node.param().op_type));
653 _return_fn = std::move(fn);
656 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
658 const auto output_index{node.getOutputs().at(0)};
659 const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
660 const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
662 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
663 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
664 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
666 auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
668 fn->configure(lhs_tensor, rhs_tensor, output_tensor,
669 convertElementwiseBinaryType(node.param().op_type));
671 _return_fn = std::move(fn);
674 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
676 const auto output_index{node.getOutputs().at(0)};
677 const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
679 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
680 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
682 auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
684 fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
686 _return_fn = std::move(fn);
689 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
691 const auto output_index{node.getOutputs().at(0)};
692 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
693 const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
695 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
696 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
697 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
699 auto fn = std::make_unique<ops::ExpandDimsLayer>();
701 fn->configure(input_tensor, axis_tensor, output_tensor);
703 _return_fn = std::move(fn);
706 void KernelGenerator::visit(const ir::operation::Pack &node)
708 const auto ofm_index{node.getOutputs().at(0)};
710 const auto rank = _ctx.at(ofm_index).shape().rank();
711 const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
713 assert(-rank <= axis && axis < rank);
715 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
717 std::vector<const IPortableTensor *> input_tensors;
718 for (auto &ifm_idx : node.getInputs())
719 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
721 auto fn = std::make_unique<ops::PackLayer>();
723 fn->configure(input_tensors, axis, output_tensor);
725 _return_fn = std::move(fn);
728 void KernelGenerator::visit(const ir::operation::Unpack &node)
730 const auto input_index{node.getInputs().at(0)};
732 const auto rank = _ctx.at(input_index).shape().rank();
733 const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
735 assert(rank == 0 || (-rank <= axis && axis < rank));
737 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
739 std::vector<IPortableTensor *> output_tensors;
740 for (auto &output_idx : node.getOutputs())
741 output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
743 auto fn = std::make_unique<ops::UnpackLayer>();
745 uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
747 fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
749 _return_fn = std::move(fn);
752 void KernelGenerator::visit(const ir::operation::Pad &node)
754 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
755 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
756 const auto output_index{node.getOutputs().at(0)};
757 assert(_ctx.at(pad_index).data());
759 auto input = _tensor_reg->getPortableTensor(input_index).get();
760 auto output = _tensor_reg->getPortableTensor(output_index).get();
761 auto pad_rank = _ctx.at(pad_index).shape().dim(0);
762 auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
764 auto fn = std::make_unique<ops::PadLayer>();
766 bool isPadV2 = node.getInputs().size() == 3 ? true : false;
767 const void *value = nullptr;
771 const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
772 value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
775 fn->configure(input, output, pad_base, pad_rank, value);
776 _return_fn = std::move(fn);
779 void KernelGenerator::visit(const ir::operation::Transpose &node)
781 const auto output_index{node.getOutputs().at(0)};
782 const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
784 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
785 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
787 auto fn = std::make_unique<ops::TransposeLayer>();
789 fn->configure(input_tensor, output_tensor, node.param().perm);
791 _return_fn = std::move(fn);
794 void KernelGenerator::visit(const ir::operation::Reduce &node)
796 const auto output_index{node.getOutputs().at(0)};
797 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
798 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
800 const auto keep_dims = node.param().keep_dims;
801 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
802 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
803 auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
805 if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
807 auto fn = std::make_unique<ops::MeanLayer>();
809 fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
811 _return_fn = std::move(fn);
815 auto fn = std::make_unique<ops::ReduceLayer>();
817 const auto reduce_type = convertReduceType(node.param().reduce_type);
818 fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
820 _return_fn = std::move(fn);
824 void KernelGenerator::visit(const ir::operation::Select &node)
826 const auto output_index{node.getOutputs().at(0)};
827 const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
828 const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
829 const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
831 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
832 auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
833 auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
834 auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
836 auto fn = std::make_unique<ops::SelectLayer>();
838 fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
840 _return_fn = std::move(fn);
843 void KernelGenerator::visit(const ir::operation::Slice &node)
845 const auto output_index{node.getOutputs().at(0)};
846 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
847 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
848 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
850 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
851 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
852 auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
853 auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
855 auto fn = std::make_unique<ops::SliceLayer>();
857 fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
859 _return_fn = std::move(fn);
862 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
864 const auto output_index{node.getOutputs().at(0)};
865 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
866 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
867 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
868 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
870 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
871 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
872 auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
873 auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
874 auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
876 auto begin_mask = node.param().begin_mask;
877 auto end_mask = node.param().end_mask;
878 auto shrink_axis_mask = node.param().shrink_axis_mask;
880 auto fn = std::make_unique<ops::StridedSliceLayer>();
882 fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
883 end_mask, shrink_axis_mask);
885 _return_fn = std::move(fn);
888 void KernelGenerator::visit(const ir::operation::Split &node)
890 const auto num_splits = node.param().num_splits;
891 assert(num_splits == static_cast<int>(node.getOutputs().size()));
893 const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
894 const auto rank = _ctx.at(input_idx).shape().rank();
895 const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
896 auto axis_resolved = axis < 0 ? axis + rank : axis;
898 auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
900 std::vector<IPortableTensor *> out_tensors;
901 for (auto &output_idx : node.getOutputs())
902 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
904 auto fn = std::make_unique<ops::SplitLayer>();
906 fn->configure(in_tensor, num_splits, axis_resolved, out_tensors);
908 _return_fn = std::move(fn);
911 void KernelGenerator::visit(const ir::operation::Shape &node)
913 const auto ofm_index{node.getOutputs().at(0)};
914 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
916 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
917 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
919 auto fn = std::make_unique<ops::ShapeLayer>();
921 fn->configure(ifm_tensor, ofm_tensor);
923 _return_fn = std::move(fn);
926 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
928 const auto output_index{node.getOutputs().at(0)};
929 const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
931 auto output_height = node.param().height_out;
932 auto output_width = node.param().width_out;
933 auto align_corners = node.param().align_corners;
934 auto half_pixel_centers = node.param().half_pixel_centers;
936 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
937 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
939 auto fn = std::make_unique<ops::ResizeBilinearLayer>();
941 fn->configure(input_tensor, output_tensor, output_height, output_width, align_corners,
944 _return_fn = std::move(fn);
947 void KernelGenerator::visit(const ir::operation::Reverse &node)
949 const auto output_index{node.getOutputs().at(0)};
950 const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
951 const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
953 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
954 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
955 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
957 auto fn = std::make_unique<ops::ReverseLayer>();
959 fn->configure(input_tensor, axis_tensor, output_tensor);
961 _return_fn = std::move(fn);
964 void KernelGenerator::visit(const ir::operation::ArgMax &node)
966 const auto output_index{node.getOutputs().at(0)};
967 const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
969 const auto axis = node.param().axis;
971 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
972 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
974 auto fn = std::make_unique<ops::ArgMinMaxLayer>();
976 fn->configure(input_tensor, output_tensor, axis, /* is_arg_max */ true);
978 _return_fn = std::move(fn);
981 void KernelGenerator::visit(const ir::operation::Pool2D &node)
983 const auto ofm_index{node.getOutputs().at(0)};
984 const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
986 const auto kh = node.param().kh;
987 const auto kw = node.param().kw;
988 const auto stride = node.param().stride;
989 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
990 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
992 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
993 const auto activation = node.param().activation;
995 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
996 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
998 auto fn = std::make_unique<ops::PoolLayer>();
1000 fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1001 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1002 convertPoolType(node.param().op_type));
1004 _return_fn = std::move(fn);
1007 void KernelGenerator::visit(const ir::operation::Pow &node)
1009 const auto output_index{node.getOutputs().at(0)};
1010 const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1011 const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1013 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1014 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
1015 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
1017 auto fn = std::make_unique<ops::PowLayer>();
1019 fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1021 _return_fn = std::move(fn);
1024 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1026 const auto output_index{node.getOutputs().at(0)};
1027 const auto input_index{node.getInputs().at(0)};
1029 auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
1030 auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
1032 auto fn = std::make_unique<ops::L2NormLayer>();
1034 fn->configure(input_alloc, output_alloc);
1036 _return_fn = std::move(fn);
1039 void KernelGenerator::visit(const ir::operation::Range &node)
1041 const auto output_index{node.getOutputs().at(0)};
1042 const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1043 const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1044 const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1046 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1047 auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
1048 auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
1049 auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
1051 auto fn = std::make_unique<ops::RangeLayer>();
1053 fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1054 _return_fn = std::move(fn);
1057 void KernelGenerator::visit(const ir::operation::Rank &node)
1059 const auto ofm_index{node.getOutputs().at(0)};
1060 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1062 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
1063 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
1065 auto fn = std::make_unique<ops::RankLayer>();
1067 fn->configure(ifm_tensor, ofm_tensor);
1069 _return_fn = std::move(fn);
1072 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1074 const auto ofm_index{node.getOutputs().at(0)};
1075 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1076 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1078 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
1079 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
1080 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
1082 auto fn = std::make_unique<ops::SqDiffLayer>();
1084 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1085 _return_fn = std::move(fn);
1088 void KernelGenerator::visit(const ir::operation::Tile &node)
1090 const auto output_index{node.getOutputs().at(0)};
1091 const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1092 const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1094 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1095 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1096 auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
1098 auto fn = std::make_unique<ops::TileLayer>();
1100 fn->configure(input_tensor, multiples_tensor, output_tensor);
1101 _return_fn = std::move(fn);
1104 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1106 const auto output_index{node.getOutputs().at(0)};
1107 const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1108 const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1109 const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1111 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1112 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1113 auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
1114 auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
1116 auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1118 fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1119 _return_fn = std::move(fn);
1122 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1124 const auto output_index{node.getOutputs().at(0)};
1125 const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1126 const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1128 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1129 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
1130 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
1132 const auto adj_x = node.param().adj_x;
1133 const auto adj_y = node.param().adj_y;
1135 auto fn = std::make_unique<ops::BatchMatMulLayer>();
1137 fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1138 _return_fn = std::move(fn);
1141 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1143 const auto output_index{node.getOutputs().at(0)};
1144 const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1145 const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1147 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1148 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1149 auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
1151 auto fn = std::make_unique<ops::BroadcastToLayer>();
1153 fn->configure(input_tensor, shape_tensor, output_tensor);
1155 _return_fn = std::move(fn);
1158 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1160 const auto ofm_index{node.getOutputs().at(0)};
1162 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
1163 std::vector<const IPortableTensor *> input_tensors;
1164 for (auto &ifm_idx : node.getInputs())
1165 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
1167 const auto epsilon = node.param().epsilon;
1168 const auto is_training = node.param().is_training;
1169 const auto data_format = node.param().data_format;
1171 auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1173 fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1175 _return_fn = std::move(fn);
1178 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1180 const auto output_index{node.getOutputs().at(0)};
1181 const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1183 const auto beta = node.param().beta;
1184 const auto axis = node.param().axis;
1186 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1187 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1189 auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1191 fn->configure(input_tensor, beta, axis, output_tensor);
1193 _return_fn = std::move(fn);
1196 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1198 const auto output_index{node.getOutputs().at(0)};
1199 const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1200 const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1201 const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1203 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1204 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1205 auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
1206 auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
1208 auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1210 fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1212 _return_fn = std::move(fn);
1215 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1217 const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1218 const auto output_index{node.getOutputs().at(0)};
1219 auto block_size = node.param().block_size;
1221 auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
1222 auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
1224 auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1226 fn->configure(input_tensor, block_size, output_tensor);
1227 _return_fn = std::move(fn);
1230 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1232 const auto output_index{node.getOutputs().at(0)};
1233 const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1234 const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1236 auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
1237 auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
1238 auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
1240 auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1242 fn->configure(shape_alloc, seed_alloc, output_alloc);
1243 _return_fn = std::move(fn);
1246 void KernelGenerator::visit(const ir::operation::SplitV &node)
1248 const auto num_splits = node.param().num_splits;
1249 assert(num_splits == static_cast<int>(node.getOutputs().size()));
1251 const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1252 const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1253 const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1255 auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
1256 auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
1257 auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
1259 std::vector<IPortableTensor *> out_tensors;
1260 for (auto &output_idx : node.getOutputs())
1261 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
1263 auto fn = std::make_unique<ops::SplitVLayer>();
1265 fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1267 _return_fn = std::move(fn);
1271 } // namespace backend
1272 } // namespace onert