2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "KernelGenerator.h"
19 #include "ops/AddNLayer.h"
20 #include "ops/ArgMinMaxLayer.h"
21 #include "ops/BatchToSpaceNDLayer.h"
22 #include "ops/BinaryArithmeticLayer.h"
23 #include "ops/CompareLayer.h"
24 #include "ops/ConcatLayer.h"
25 #include "ops/ConvolutionLayer.h"
26 #include "ops/DepthwiseConvolutionLayer.h"
27 #include "ops/EinsumLayer.h"
28 #include "ops/ElementwiseActivationLayer.h"
29 #include "ops/ElementwiseBinaryLayer.h"
30 #include "ops/ElementwiseUnaryLayer.h"
31 #include "ops/ExpandDimsLayer.h"
32 #include "ops/FillLayer.h"
33 #include "ops/FullyConnectedLayer.h"
34 #include "ops/GatherLayer.h"
35 #include "ops/LSTMLayer.h"
36 #include "ops/MeanLayer.h"
37 #include "ops/OneHotLayer.h"
38 #include "ops/OperationUtils.h"
39 #include "ops/PackLayer.h"
40 #include "ops/PadLayer.h"
41 #include "ops/PoolLayer.h"
42 #include "ops/PowLayer.h"
43 #include "ops/RangeLayer.h"
44 #include "ops/RankLayer.h"
45 #include "ops/ReduceLayer.h"
46 #include "ops/ReshapeLayer.h"
47 #include "ops/ResizeBilinearLayer.h"
48 #include "ops/ReverseLayer.h"
49 #include "ops/SelectLayer.h"
50 #include "ops/ShapeLayer.h"
51 #include "ops/SliceLayer.h"
52 #include "ops/SoftMaxLayer.h"
53 #include "ops/StridedSliceLayer.h"
54 #include "ops/SpaceToBatchNDLayer.h"
55 #include "ops/SpaceToDepthLayer.h"
56 #include "ops/SplitLayer.h"
57 #include "ops/SplitVLayer.h"
58 #include "ops/TileLayer.h"
59 #include "ops/TransposeLayer.h"
60 #include "ops/UnpackLayer.h"
61 #include "ops/SquaredDiffLayer.h"
62 #include "ops/L2NormLayer.h"
63 #include "ops/MatrixBandPartLayer.h"
64 #include "ops/BatchMatMulLayer.h"
65 #include "ops/BroadcastToLayer.h"
66 #include "ops/FusedBatchNormLayer.h"
67 #include "ops/LogSoftMaxLayer.h"
68 #include "ops/StatelessRandomUniformLayer.h"
70 #include <backend/Backend.h>
71 #include <backend/IConfig.h>
73 #include <util/Utils.h>
74 #include <util/logging.h>
75 #include <exec/DynamicShapeInferer.h>
89 convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
91 switch (arithmetic_type_ir)
93 case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
94 return ops::ArithmeticType::kAdd;
95 case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
96 return ops::ArithmeticType::kSub;
97 case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
98 return ops::ArithmeticType::kMul;
99 case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
100 return ops::ArithmeticType::kDiv;
102 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
106 ops::ElementwiseActivationType
107 convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
111 case ir::operation::ElementwiseActivation::Type::LOGISTIC:
112 return ops::ElementwiseActivationType::kLogistic;
113 case ir::operation::ElementwiseActivation::Type::RELU:
114 return ops::ElementwiseActivationType::kReLU;
115 case ir::operation::ElementwiseActivation::Type::TANH:
116 return ops::ElementwiseActivationType::kTanh;
118 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
122 ops::ElementwiseBinaryType
123 convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
127 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
128 return ops::ElementwiseBinaryType::kLogicalOr;
129 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
130 return ops::ElementwiseBinaryType::kMax;
131 case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
132 return ops::ElementwiseBinaryType::kMin;
134 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
138 ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
142 case ir::operation::ElementwiseUnary::Type::ABS:
143 return ops::ElementwiseUnaryType::kAbs;
144 case ir::operation::ElementwiseUnary::Type::CAST:
145 return ops::ElementwiseUnaryType::kCast;
146 case ir::operation::ElementwiseUnary::Type::COS:
147 return ops::ElementwiseUnaryType::kCos;
148 case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
149 return ops::ElementwiseUnaryType::kDequantize;
150 case ir::operation::ElementwiseUnary::Type::ERF:
151 return ops::ElementwiseUnaryType::kErf;
152 case ir::operation::ElementwiseUnary::Type::EXP:
153 return ops::ElementwiseUnaryType::kExp;
154 case ir::operation::ElementwiseUnary::Type::FLOOR:
155 return ops::ElementwiseUnaryType::kFloor;
156 case ir::operation::ElementwiseUnary::Type::LOG:
157 return ops::ElementwiseUnaryType::kLog;
158 case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
159 return ops::ElementwiseUnaryType::kLogicalNot;
160 case ir::operation::ElementwiseUnary::Type::NEG:
161 return ops::ElementwiseUnaryType::kNeg;
162 case ir::operation::ElementwiseUnary::Type::QUANTIZE:
163 return ops::ElementwiseUnaryType::kQuantize;
164 case ir::operation::ElementwiseUnary::Type::ROUND:
165 return ops::ElementwiseUnaryType::kRound;
166 case ir::operation::ElementwiseUnary::Type::RSQRT:
167 return ops::ElementwiseUnaryType::kRSqrt;
168 case ir::operation::ElementwiseUnary::Type::SIN:
169 return ops::ElementwiseUnaryType::kSin;
170 case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
171 return ops::ElementwiseUnaryType::kZerosLike;
173 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
177 ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
181 case ir::operation::Pool2D::PoolType::AVG:
182 return ops::PoolType::kAvg;
183 case ir::operation::Pool2D::PoolType::MAX:
184 return ops::PoolType::kMax;
186 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
190 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
192 switch (reduce_type_ir)
194 case ir::operation::Reduce::ReduceType::ALL:
195 return ops::ReduceType::kAll;
196 case ir::operation::Reduce::ReduceType::ANY:
197 return ops::ReduceType::kAny;
198 case ir::operation::Reduce::ReduceType::MAX:
199 return ops::ReduceType::kMax;
200 case ir::operation::Reduce::ReduceType::MIN:
201 return ops::ReduceType::kMin;
202 case ir::operation::Reduce::ReduceType::PROD:
203 return ops::ReduceType::kProd;
204 case ir::operation::Reduce::ReduceType::SUM:
205 return ops::ReduceType::kSum;
207 throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
212 KernelGenerator::KernelGenerator(
213 const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
214 const std::shared_ptr<TensorBuilder> &tensor_builder,
215 const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
216 const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
217 const std::shared_ptr<ExternalContext> &external_context)
218 : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
219 _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
220 _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
225 void KernelGenerator::visit(const ir::operation::AddN &node)
227 const auto output_index{node.getOutputs().at(0)};
229 std::vector<const IPortableTensor *> input_tensors;
230 for (auto &input_idx : node.getInputs())
231 input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
233 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
235 auto fn = std::make_unique<ops::AddNLayer>();
237 fn->configure(std::move(input_tensors), output_tensor);
239 _return_fn = std::move(fn);
242 void KernelGenerator::visit(const ir::OpSequence &op_seq)
244 assert(!_return_fn_seq);
245 assert(_tensor_builder->dynamicTensorManager());
248 auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
250 _return_fn_seq = std::make_unique<exec::FunctionSequence>();
252 // Prepare to handle dynamic tensors later
253 auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
255 dyn_ctx->op_seq = &op_seq;
256 dyn_ctx->operations = &_operations_ctx;
257 dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
258 dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
260 _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
263 _current_op_seq_layout = op_seq.getLayout();
264 for (const auto &operation_idx : op_seq.operations())
266 const auto &node = _operations_ctx.at(operation_idx);
268 _return_fn_seq->append(releaseFunction());
270 for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
272 auto portable_tensor = _tensor_reg->getPortableTensor(ind);
275 assert(portable_tensor->layout() == ir::Layout::NHWC);
278 auto tensor = _tensor_reg->getNativeTensor(ind);
281 tensor->increase_ref();
287 void KernelGenerator::visit(const ir::operation::Conv2D &node)
289 using ir::operation::Conv2D;
291 const auto ofm_index{node.getOutputs().at(0)};
292 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
293 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
294 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
296 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
297 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
298 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
299 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
301 const auto stride = node.param().stride;
302 const auto activation = node.param().activation;
303 const auto param_padding = node.param().padding;
304 const auto dilation = node.param().dilation;
305 auto fn = std::make_unique<ops::ConvolutionLayer>();
307 if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
309 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
310 param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
311 stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
312 activation, ofm_tensor);
314 _return_fn = std::move(fn);
317 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
318 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
319 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
320 const auto &ker_shape = _ctx.at(ker_index).shape();
321 const auto ker_height = ker_shape.dim(1);
322 const auto ker_width = ker_shape.dim(2);
325 ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
326 dilation.width_factor, dilation.height_factor);
328 fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
329 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
330 dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
332 _return_fn = std::move(fn);
335 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
337 using ir::operation::DepthwiseConv2D;
339 const auto ofm_index{node.getOutputs().at(0)};
340 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
341 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
342 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
344 const auto stride = node.param().stride;
345 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
346 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
347 // Kernel format is [1, kernel_height, kernel_width, depth_out].
348 const auto &ker_shape = _ctx.at(ker_index).shape();
349 const auto ker_height = ker_shape.dim(1);
350 const auto ker_width = ker_shape.dim(2);
351 const auto dilation_width = node.param().dilation.width_factor;
352 const auto dilation_height = node.param().dilation.height_factor;
353 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
354 ker_width, ker_height, dilation_width, dilation_height);
355 const auto multiplier = node.param().multiplier;
356 const auto activation = node.param().activation;
358 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
359 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
360 auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
361 auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
363 auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
365 fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
366 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
367 dilation_height, activation, ofm_tensor);
369 _return_fn = std::move(fn);
372 void KernelGenerator::visit(const ir::operation::Concat &node)
374 const auto ofm_index{node.getOutputs().at(0)};
376 const auto rank = _ctx.at(ofm_index).shape().rank();
377 const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
379 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
381 std::vector<const IPortableTensor *> input_tensors;
382 for (auto &ifm_idx : node.getInputs())
383 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
385 auto fn = std::make_unique<ops::ConcatLayer>();
387 fn->configure(input_tensors, axis, output_tensor);
389 _return_fn = std::move(fn);
392 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
394 const auto output_index{node.getOutputs().at(0)};
395 const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
396 const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
398 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
399 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
400 auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index);
402 auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
404 IPortableTensor *crops_alloc = nullptr;
405 const auto NNApiInputs = 2;
407 if (node.getInputs().size() != NNApiInputs)
409 const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
410 crops_alloc = _tensor_reg->getPortableTensor(crops_data_index);
413 fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
415 _return_fn = std::move(fn);
418 void KernelGenerator::visit(const ir::operation::Fill &node)
420 const auto output_index{node.getOutputs().at(0)};
421 const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
422 const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
424 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
425 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
426 auto value_tensor = _tensor_reg->getPortableTensor(value_index);
428 auto fn = std::make_unique<ops::FillLayer>();
430 fn->configure(input_tensor, value_tensor, output_tensor);
432 _return_fn = std::move(fn);
435 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
437 using ir::operation::FullyConnected;
439 const auto output_index{node.getOutputs().at(0)};
440 const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
441 const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
442 const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
443 const auto activation = node.param().activation;
444 const auto weights_format = node.param().weights_format;
446 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
447 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
448 auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
449 auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
451 auto fn = std::make_unique<ops::FullyConnectedLayer>();
453 fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
456 _return_fn = std::move(fn);
459 void KernelGenerator::visit(const ir::operation::Reshape &node)
461 const auto output_index{node.getOutputs().at(0)};
462 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
464 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
465 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
467 // optional 2nd input
468 IPortableTensor *shape_tensor = nullptr;
470 if (node.getInputs().size() == 2)
472 const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
473 shape_tensor = _tensor_reg->getPortableTensor(shape_index);
476 auto fn = std::make_unique<ops::ReshapeLayer>();
478 fn->configure(input_tensor, shape_tensor, output_tensor);
479 _return_fn = std::move(fn);
482 void KernelGenerator::visit(const ir::operation::Squeeze &node)
484 const auto output_index{node.getOutputs().at(0)};
485 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
487 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
488 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
490 // Squeeze can share same kernel with reshape
491 auto fn = std::make_unique<ops::ReshapeLayer>();
493 fn->configure(input_tensor, nullptr, output_tensor);
495 _return_fn = std::move(fn);
498 void KernelGenerator::visit(const ir::operation::Softmax &node)
500 const auto output_index{node.getOutputs().at(0)};
501 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
503 const auto beta = node.param().beta;
505 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
506 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
508 auto fn = std::make_unique<ops::SoftMaxLayer>();
510 fn->configure(input_tensor, beta, output_tensor);
512 _return_fn = std::move(fn);
515 void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
517 const auto ofm_index{node.getOutputs().at(0)};
518 const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
519 const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
521 const auto activation = node.param().activation;
523 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
524 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
525 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
527 auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
529 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
530 convertArithmeticType(node.param().arithmetic_type));
532 _return_fn = std::move(fn);
535 void KernelGenerator::visit(const ir::operation::Comparison &node)
537 const auto ofm_index{node.getOutputs().at(0)};
538 const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
539 const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
541 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
542 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
543 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
545 auto comparison_type = node.param().comparison_type;
547 auto fn = std::make_unique<ops::CompareLayer>();
549 fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
551 _return_fn = std::move(fn);
554 void KernelGenerator::visit(const ir::operation::Gather &node)
556 const auto output_index{node.getOutputs().at(0)};
557 const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
558 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
560 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
561 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
562 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
564 const auto backend_layout = output_tensor->layout();
565 UNUSED_RELEASE(backend_layout);
567 // NOTE The frontend layout and backend layout must be the same for this operation.
568 // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
569 // is not not efficient even if it works well. If so, it would be better to set the
570 // layout of these backend tensors to the same layout.
571 // There is also one thing we have to think about. This operation depends on the layout of
572 // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
573 // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
574 // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
575 assert(backend_layout == input_tensor->layout());
576 assert(backend_layout == indices_tensor->layout());
577 const auto &input_shape = _ctx.at(input_index).shape();
578 UNUSED_RELEASE(input_shape);
579 assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
581 const auto axis_raw = node.param().axis;
582 const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
584 auto fn = std::make_unique<ops::GatherLayer>();
586 fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
588 _return_fn = std::move(fn);
591 void KernelGenerator::visit(const ir::operation::OneHot &node)
593 const auto output_index{node.getOutputs().at(0)};
594 const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
595 const auto depth_index{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
596 const auto onvalue_index{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
597 const auto offvalue_index{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
599 const auto axis = node.param().axis;
601 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
602 auto indices_tensor = _tensor_reg->getPortableTensor(indices_index);
603 auto depth_tensor = _tensor_reg->getPortableTensor(depth_index);
604 auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index);
605 auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index);
607 assert(indices_tensor->data_type() == OperandType::INT32);
608 assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
610 auto fn = std::make_unique<ops::OneHotLayer>();
612 fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
614 _return_fn = std::move(fn);
617 void KernelGenerator::visit(const ir::operation::Einsum &node)
619 const auto ofm_index{node.getOutputs().at(0)};
621 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
622 std::vector<const IPortableTensor *> input_tensors;
623 for (auto &ifm_idx : node.getInputs())
624 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
626 const auto equation = node.param().equation;
628 auto fn = std::make_unique<ops::EinsumLayer>();
630 fn->configure(input_tensors, equation, output_tensor);
632 _return_fn = std::move(fn);
635 void KernelGenerator::visit(const ir::operation::Custom &node)
637 auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
638 std::vector<custom::TypeInfo> &types,
639 std::vector<IPortableTensor *> &tensors) {
640 for (auto &idx : opSeq)
642 const auto &operand = _ctx.at(idx);
643 // TODO make sure using `_current_op_seq_layout` is correct for custom operations
644 types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
645 auto in_tensor = _tensor_reg->getPortableTensor(idx);
646 tensors.emplace_back(in_tensor);
650 backend::custom::CustomKernelConfigParams params{};
652 fill_op_info(node.getInputs(), params.input_types, params.input_tensors);
653 fill_op_info(node.getOutputs(), params.output_types, params.output_tensors);
655 params.userdata = node.userdata().data;
656 params.userdata_size = node.userdata().size;
658 auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
660 _return_fn = std::move(fn);
663 void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
665 const auto output_index{node.getOutputs().at(0)};
666 const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
668 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
669 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
671 auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
673 fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
674 convertElementwiseActivationType(node.param().op_type));
676 _return_fn = std::move(fn);
679 void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
681 const auto output_index{node.getOutputs().at(0)};
682 const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
683 const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
685 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
686 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
687 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
689 auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
691 fn->configure(lhs_tensor, rhs_tensor, output_tensor,
692 convertElementwiseBinaryType(node.param().op_type));
694 _return_fn = std::move(fn);
697 void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
699 const auto output_index{node.getOutputs().at(0)};
700 const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
702 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
703 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
705 auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
707 fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
709 _return_fn = std::move(fn);
712 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
714 const auto output_index{node.getOutputs().at(0)};
715 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
716 const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
718 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
719 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
720 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
722 auto fn = std::make_unique<ops::ExpandDimsLayer>();
724 fn->configure(input_tensor, axis_tensor, output_tensor);
726 _return_fn = std::move(fn);
729 void KernelGenerator::visit(const ir::operation::Pack &node)
731 const auto ofm_index{node.getOutputs().at(0)};
733 const auto rank = _ctx.at(ofm_index).shape().rank();
734 const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
736 assert(-rank <= axis && axis < rank);
738 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
740 std::vector<const IPortableTensor *> input_tensors;
741 for (auto &ifm_idx : node.getInputs())
742 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
744 auto fn = std::make_unique<ops::PackLayer>();
746 fn->configure(input_tensors, axis, output_tensor);
748 _return_fn = std::move(fn);
751 void KernelGenerator::visit(const ir::operation::Unpack &node)
753 const auto input_index{node.getInputs().at(0)};
755 const auto rank = _ctx.at(input_index).shape().rank();
756 const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
758 assert(rank == 0 || (-rank <= axis && axis < rank));
760 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
762 std::vector<IPortableTensor *> output_tensors;
763 for (auto &output_idx : node.getOutputs())
764 output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
766 auto fn = std::make_unique<ops::UnpackLayer>();
768 uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
770 fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
772 _return_fn = std::move(fn);
775 void KernelGenerator::visit(const ir::operation::Pad &node)
777 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
778 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
779 const auto output_index{node.getOutputs().at(0)};
780 assert(_ctx.at(pad_index).data());
782 auto input = _tensor_reg->getPortableTensor(input_index);
783 auto output = _tensor_reg->getPortableTensor(output_index);
784 auto pad_rank = _ctx.at(pad_index).shape().dim(0);
785 auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
787 auto fn = std::make_unique<ops::PadLayer>();
789 bool isPadV2 = node.getInputs().size() == 3 ? true : false;
790 const void *value = nullptr;
794 const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
795 value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
798 fn->configure(input, output, pad_base, pad_rank, value);
799 _return_fn = std::move(fn);
802 void KernelGenerator::visit(const ir::operation::Transpose &node)
804 const auto output_index{node.getOutputs().at(0)};
805 const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
806 const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
808 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
809 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
810 auto perm_tensor = _tensor_reg->getPortableTensor(perm_index);
812 auto fn = std::make_unique<ops::TransposeLayer>();
814 fn->configure(input_tensor, perm_tensor, output_tensor);
816 _return_fn = std::move(fn);
819 void KernelGenerator::visit(const ir::operation::Reduce &node)
821 const auto output_index{node.getOutputs().at(0)};
822 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
823 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
825 const auto keep_dims = node.param().keep_dims;
826 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
827 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
828 auto axes_tensor = _tensor_reg->getPortableTensor(axes_index);
830 if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
832 auto fn = std::make_unique<ops::MeanLayer>();
834 fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
836 _return_fn = std::move(fn);
840 auto fn = std::make_unique<ops::ReduceLayer>();
842 const auto reduce_type = convertReduceType(node.param().reduce_type);
843 fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
845 _return_fn = std::move(fn);
849 void KernelGenerator::visit(const ir::operation::Select &node)
851 const auto output_index{node.getOutputs().at(0)};
852 const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
853 const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
854 const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
856 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
857 auto condition_tensor = _tensor_reg->getPortableTensor(condition_index);
858 auto true_tensor = _tensor_reg->getPortableTensor(true_index);
859 auto false_tensor = _tensor_reg->getPortableTensor(false_index);
861 auto fn = std::make_unique<ops::SelectLayer>();
863 fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
865 _return_fn = std::move(fn);
868 void KernelGenerator::visit(const ir::operation::Slice &node)
870 const auto output_index{node.getOutputs().at(0)};
871 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
872 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
873 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
875 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
876 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
877 auto begins_tensor = _tensor_reg->getPortableTensor(begins_index);
878 auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index);
880 auto fn = std::make_unique<ops::SliceLayer>();
882 fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
884 _return_fn = std::move(fn);
887 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
889 const auto output_index{node.getOutputs().at(0)};
890 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
891 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
892 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
893 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
895 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
896 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
897 auto starts_tensor = _tensor_reg->getPortableTensor(starts_index);
898 auto ends_tensor = _tensor_reg->getPortableTensor(ends_index);
899 auto strides_tensor = _tensor_reg->getPortableTensor(strides_index);
901 auto begin_mask = node.param().begin_mask;
902 auto end_mask = node.param().end_mask;
903 auto shrink_axis_mask = node.param().shrink_axis_mask;
905 auto fn = std::make_unique<ops::StridedSliceLayer>();
907 fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
908 end_mask, shrink_axis_mask);
910 _return_fn = std::move(fn);
913 void KernelGenerator::visit(const ir::operation::Split &node)
915 const auto num_splits = node.param().num_splits;
916 assert(num_splits == static_cast<int>(node.getOutputs().size()));
918 const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
919 const auto axis_idx{node.getInputs().at(ir::operation::Split::Input::AXIS)};
921 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
922 auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
924 std::vector<IPortableTensor *> out_tensors;
925 for (auto &output_idx : node.getOutputs())
926 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
928 auto fn = std::make_unique<ops::SplitLayer>();
930 fn->configure(in_tensor, axis_tensor, num_splits, out_tensors);
932 _return_fn = std::move(fn);
935 void KernelGenerator::visit(const ir::operation::Shape &node)
937 const auto ofm_index{node.getOutputs().at(0)};
938 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
940 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
941 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
943 auto fn = std::make_unique<ops::ShapeLayer>();
945 fn->configure(ifm_tensor, ofm_tensor);
947 _return_fn = std::move(fn);
950 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
952 const auto output_index{node.getOutputs().at(0)};
953 const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
955 auto align_corners = node.param().align_corners;
956 auto half_pixel_centers = node.param().half_pixel_centers;
958 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
959 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
961 auto fn = std::make_unique<ops::ResizeBilinearLayer>();
963 if (node.getInputs().size() == 1)
965 fn->configure(input_tensor, output_tensor, node.param().height_out, node.param().width_out,
966 align_corners, half_pixel_centers);
970 assert(node.getInputs().size() == 2);
971 const auto size_index{node.getInputs().at(ir::operation::ResizeBilinear::SIZE)};
972 auto size_tensor = _tensor_reg->getPortableTensor(size_index);
973 if (size_tensor->is_constant())
975 auto size_vec = _ctx.at(size_index).asVector<int32_t>();
976 const auto height_out = size_vec[0];
977 const auto width_out = size_vec[1];
978 fn->configure(input_tensor, output_tensor, height_out, width_out, align_corners,
983 fn->configure(input_tensor, output_tensor, size_tensor, align_corners, half_pixel_centers);
987 _return_fn = std::move(fn);
990 void KernelGenerator::visit(const ir::operation::Reverse &node)
992 const auto output_index{node.getOutputs().at(0)};
993 const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
994 const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
996 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
997 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
998 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1000 auto fn = std::make_unique<ops::ReverseLayer>();
1002 fn->configure(input_tensor, axis_tensor, output_tensor);
1004 _return_fn = std::move(fn);
1007 void KernelGenerator::visit(const ir::operation::ArgMax &node)
1009 const auto output_index{node.getOutputs().at(0)};
1010 const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
1011 const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
1013 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1014 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1015 auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
1017 auto fn = std::make_unique<ops::ArgMinMaxLayer>();
1019 fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
1021 _return_fn = std::move(fn);
1024 void KernelGenerator::visit(const ir::operation::Pool2D &node)
1026 const auto ofm_index{node.getOutputs().at(0)};
1027 const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
1029 const auto kh = node.param().kh;
1030 const auto kw = node.param().kw;
1031 const auto stride = node.param().stride;
1032 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
1033 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
1034 const auto padding =
1035 ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
1036 const auto activation = node.param().activation;
1038 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1039 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1041 auto fn = std::make_unique<ops::PoolLayer>();
1043 fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
1044 stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
1045 convertPoolType(node.param().op_type));
1047 _return_fn = std::move(fn);
1050 void KernelGenerator::visit(const ir::operation::Pow &node)
1052 const auto output_index{node.getOutputs().at(0)};
1053 const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
1054 const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
1056 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1057 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1058 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1060 auto fn = std::make_unique<ops::PowLayer>();
1062 fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
1064 _return_fn = std::move(fn);
1067 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1069 const auto output_index{node.getOutputs().at(0)};
1070 const auto input_index{node.getInputs().at(0)};
1072 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1073 auto input_alloc = _tensor_reg->getPortableTensor(input_index);
1075 auto fn = std::make_unique<ops::L2NormLayer>();
1077 fn->configure(input_alloc, output_alloc);
1079 _return_fn = std::move(fn);
1082 void KernelGenerator::visit(const ir::operation::Range &node)
1084 const auto output_index{node.getOutputs().at(0)};
1085 const auto start_index{node.getInputs().at(ir::operation::Range::START)};
1086 const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
1087 const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
1089 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1090 auto start_tensor = _tensor_reg->getPortableTensor(start_index);
1091 auto limit_tensor = _tensor_reg->getPortableTensor(limit_index);
1092 auto delta_tensor = _tensor_reg->getPortableTensor(delta_index);
1094 auto fn = std::make_unique<ops::RangeLayer>();
1096 fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
1097 _return_fn = std::move(fn);
1100 void KernelGenerator::visit(const ir::operation::Rank &node)
1102 const auto ofm_index{node.getOutputs().at(0)};
1103 const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
1105 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1106 auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
1108 auto fn = std::make_unique<ops::RankLayer>();
1110 fn->configure(ifm_tensor, ofm_tensor);
1112 _return_fn = std::move(fn);
1115 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1117 const auto ofm_index{node.getOutputs().at(0)};
1118 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1119 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1121 auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
1122 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1123 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1125 auto fn = std::make_unique<ops::SqDiffLayer>();
1127 fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
1128 _return_fn = std::move(fn);
1131 void KernelGenerator::visit(const ir::operation::Tile &node)
1133 const auto output_index{node.getOutputs().at(0)};
1134 const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
1135 const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
1137 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1138 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1139 auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index);
1141 auto fn = std::make_unique<ops::TileLayer>();
1143 fn->configure(input_tensor, multiples_tensor, output_tensor);
1144 _return_fn = std::move(fn);
1147 void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
1149 const auto output_index{node.getOutputs().at(0)};
1150 const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::INPUT)};
1151 const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
1152 const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
1154 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1155 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1156 auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index);
1157 auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index);
1159 auto fn = std::make_unique<ops::MatrixBandPartLayer>();
1161 fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
1162 _return_fn = std::move(fn);
1165 void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
1167 const auto output_index{node.getOutputs().at(0)};
1168 const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
1169 const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
1171 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1172 auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index);
1173 auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index);
1175 const auto adj_x = node.param().adj_x;
1176 const auto adj_y = node.param().adj_y;
1178 auto fn = std::make_unique<ops::BatchMatMulLayer>();
1180 fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
1181 _return_fn = std::move(fn);
1184 void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
1186 const auto output_index{node.getOutputs().at(0)};
1187 const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
1188 const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
1190 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1191 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1192 auto shape_tensor = _tensor_reg->getPortableTensor(shape_index);
1194 auto fn = std::make_unique<ops::BroadcastToLayer>();
1196 fn->configure(input_tensor, shape_tensor, output_tensor);
1198 _return_fn = std::move(fn);
1201 void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
1203 const auto ofm_index{node.getOutputs().at(0)};
1205 auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
1206 std::vector<const IPortableTensor *> input_tensors;
1207 for (auto &ifm_idx : node.getInputs())
1208 input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
1210 const auto epsilon = node.param().epsilon;
1211 const auto is_training = node.param().is_training;
1212 const auto data_format = node.param().data_format;
1214 auto fn = std::make_unique<ops::FusedBatchNormLayer>();
1216 fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
1218 _return_fn = std::move(fn);
1221 void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
1223 const auto output_index{node.getOutputs().at(0)};
1224 const auto input_index{node.getInputs().at(ir::operation::LogSoftmax::Input::INPUT)};
1226 const auto beta = node.param().beta;
1227 const auto axis = node.param().axis;
1229 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1230 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1232 auto fn = std::make_unique<ops::LogSoftMaxLayer>();
1234 fn->configure(input_tensor, beta, axis, output_tensor);
1236 _return_fn = std::move(fn);
1239 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1241 const auto output_index{node.getOutputs().at(0)};
1242 const auto input_index{node.getInputs().at(ir::operation::SpaceToBatchND::INPUT)};
1243 const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
1244 const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
1246 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1247 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1248 auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index);
1249 auto padding_tensor = _tensor_reg->getPortableTensor(padding_index);
1251 auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
1253 fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
1255 _return_fn = std::move(fn);
1258 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1260 const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1261 const auto output_index{node.getOutputs().at(0)};
1262 auto block_size = node.param().block_size;
1264 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1265 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1267 auto fn = std::make_unique<ops::SpaceToDepthLayer>();
1269 fn->configure(input_tensor, block_size, output_tensor);
1270 _return_fn = std::move(fn);
1273 void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
1275 const auto output_index{node.getOutputs().at(0)};
1276 const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
1277 const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
1279 auto output_alloc = _tensor_reg->getPortableTensor(output_index);
1280 auto shape_alloc = _tensor_reg->getPortableTensor(shape_index);
1281 auto seed_alloc = _tensor_reg->getPortableTensor(seed_index);
1283 auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
1285 fn->configure(shape_alloc, seed_alloc, output_alloc);
1286 _return_fn = std::move(fn);
1289 void KernelGenerator::visit(const ir::operation::SplitV &node)
1291 const auto num_splits = node.param().num_splits;
1292 assert(num_splits == static_cast<int>(node.getOutputs().size()));
1294 const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
1295 const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
1296 const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
1298 auto in_tensor = _tensor_reg->getPortableTensor(input_idx);
1299 auto in_size_splits = _tensor_reg->getPortableTensor(size_splits);
1300 auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
1302 std::vector<IPortableTensor *> out_tensors;
1303 for (auto &output_idx : node.getOutputs())
1304 out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
1306 auto fn = std::make_unique<ops::SplitVLayer>();
1308 fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
1310 _return_fn = std::move(fn);
1313 void KernelGenerator::visit(const ir::operation::LSTM &node)
1315 const auto scratch_buffer_index{
1316 node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
1317 const auto output_state_out_index{
1318 node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
1319 const auto cell_state_out_index{
1320 node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
1321 const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
1323 const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
1324 const auto input_to_input_weights_index{
1325 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
1326 const auto input_to_forget_weights_index{
1327 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
1328 const auto input_to_cell_weights_index{
1329 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
1330 const auto input_to_output_weights_index{
1331 node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
1332 const auto recurrent_to_input_weights_index{
1333 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
1334 const auto recurrent_to_forget_weights_index{
1335 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
1336 const auto recurrent_to_cell_weights_index{
1337 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
1338 const auto recurrent_to_output_weights_index{
1339 node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
1340 const auto cell_to_input_weights_index{
1341 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
1342 const auto cell_to_forget_weights_index{
1343 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
1344 const auto cell_to_output_weights_index{
1345 node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
1346 const auto input_gate_bias_index{
1347 node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
1348 const auto forget_gate_bias_index{
1349 node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
1350 const auto cell_gate_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
1351 const auto output_gate_bias_index{
1352 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
1353 const auto projection_weights_index{
1354 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
1355 const auto projection_bias_index{
1356 node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
1357 const auto output_state_in_index{
1358 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
1359 const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
1360 const auto time_major = node.param().time_major;
1362 // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
1363 // has_input_to_input_weights && has_recurrent_to_input_weights: no CIFG
1364 // !(has_input_to_input_weights && has_recurrent_to_input_weights): CIFG
1365 // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
1366 bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
1367 (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
1368 _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
1369 bool has_recurrent_to_input_weights =
1370 _ctx.exist(recurrent_to_input_weights_index) &&
1371 (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
1372 _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
1374 // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
1375 // But the cell_to_input_weights does not exist in regular CIFG although peephole.
1376 // has_cell_to_forget_weights && has_cell_to_output_weights: peephole
1377 // !(has_cell_to_forget_weights && has_cell_to_output_weights): no peephole
1378 bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
1379 _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
1380 bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
1381 _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
1383 bool has_input_gate_bias =
1384 _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0);
1386 bool has_projection_weights = _ctx.exist(projection_weights_index) &&
1387 (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
1388 _ctx.at(projection_weights_index).shape().dim(1) != 0);
1389 bool has_projection_bias =
1390 _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0);
1392 auto scratch_buffer_tensor = _ctx.exist(scratch_buffer_index)
1393 ? _tensor_reg->getPortableTensor(scratch_buffer_index)
1394 : nullptr; // optional
1395 auto output_state_out_tensor = _ctx.exist(output_state_out_index)
1396 ? _tensor_reg->getPortableTensor(output_state_out_index)
1397 : nullptr; // optional
1398 auto cell_state_out_tensor = _ctx.exist(cell_state_out_index)
1399 ? _tensor_reg->getPortableTensor(cell_state_out_index)
1400 : nullptr; // optional
1401 auto output_tensor = _tensor_reg->getPortableTensor(output_index);
1403 auto input_tensor = _tensor_reg->getPortableTensor(input_index);
1405 auto input_to_input_weights_tensor =
1406 has_input_to_input_weights ? _tensor_reg->getPortableTensor(input_to_input_weights_index)
1407 : nullptr; // optional
1408 auto input_to_forget_weights_tensor =
1409 _tensor_reg->getPortableTensor(input_to_forget_weights_index);
1410 auto input_to_cell_weights_tensor = _tensor_reg->getPortableTensor(input_to_cell_weights_index);
1411 auto input_to_output_weights_tensor =
1412 _tensor_reg->getPortableTensor(input_to_output_weights_index);
1413 auto recurrent_to_input_weights_tensor =
1414 has_recurrent_to_input_weights
1415 ? _tensor_reg->getPortableTensor(recurrent_to_input_weights_index)
1416 : nullptr; // optional
1417 auto recurrent_to_forget_weights_tensor =
1418 _tensor_reg->getPortableTensor(recurrent_to_forget_weights_index);
1419 auto recurrent_to_cell_weights_tensor =
1420 _tensor_reg->getPortableTensor(recurrent_to_cell_weights_index);
1421 auto recurrent_to_output_weights_tensor =
1422 _tensor_reg->getPortableTensor(recurrent_to_output_weights_index);
1424 auto cell_to_input_weights_tensor = _tensor_reg->getPortableTensor(cell_to_input_weights_index);
1425 auto cell_to_forget_weights_tensor =
1426 has_cell_to_forget_weights ? _tensor_reg->getPortableTensor(cell_to_forget_weights_index)
1427 : nullptr; // optional
1428 auto cell_to_output_weights_tensor =
1429 has_cell_to_output_weights ? _tensor_reg->getPortableTensor(cell_to_output_weights_index)
1430 : nullptr; // optional
1432 auto input_gate_bias_tensor =
1433 has_input_gate_bias ? _tensor_reg->getPortableTensor(input_gate_bias_index) : nullptr;
1434 auto forget_gate_bias_tensor = _tensor_reg->getPortableTensor(forget_gate_bias_index);
1435 auto cell_gate_bias_tensor = _tensor_reg->getPortableTensor(cell_gate_bias_index);
1436 auto output_gate_bias_tensor = _tensor_reg->getPortableTensor(output_gate_bias_index);
1437 auto output_state_in_tensor = _tensor_reg->getPortableTensor(output_state_in_index);
1438 auto cell_state_in_tensor = _tensor_reg->getPortableTensor(cell_state_in_index);
1440 auto projection_weights_tensor = has_projection_weights
1441 ? _tensor_reg->getPortableTensor(projection_weights_index)
1442 : nullptr; // optional
1443 auto projection_bias_tensor = has_projection_bias
1444 ? _tensor_reg->getPortableTensor(projection_bias_index)
1445 : nullptr; // optional
1447 IPortableTensor *input_layer_norm_weights_tensor = nullptr;
1448 IPortableTensor *forget_layer_norm_weights_tensor = nullptr;
1449 IPortableTensor *cell_layer_norm_weights_tensor = nullptr;
1450 IPortableTensor *output_layer_norm_weights_tensor = nullptr;
1451 if (node.getInputs().size() == 24)
1453 const auto input_layer_norm_weights_index{
1454 node.getInputs().at(ir::operation::LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)};
1455 const auto forget_layer_norm_weights_index{
1456 node.getInputs().at(ir::operation::LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)};
1457 const auto cell_layer_norm_weights_index{
1458 node.getInputs().at(ir::operation::LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)};
1459 const auto output_layer_norm_weights_index{
1460 node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS)};
1462 input_layer_norm_weights_tensor =
1463 _tensor_reg->getPortableTensor(input_layer_norm_weights_index);
1464 forget_layer_norm_weights_tensor =
1465 _tensor_reg->getPortableTensor(forget_layer_norm_weights_index);
1466 cell_layer_norm_weights_tensor = _tensor_reg->getPortableTensor(cell_layer_norm_weights_index);
1467 output_layer_norm_weights_tensor =
1468 _tensor_reg->getPortableTensor(output_layer_norm_weights_index);
1471 auto fn = std::make_unique<ops::LSTMLayer>();
1474 input_tensor, input_to_input_weights_tensor, input_to_forget_weights_tensor,
1475 input_to_cell_weights_tensor, input_to_output_weights_tensor,
1476 recurrent_to_input_weights_tensor, recurrent_to_forget_weights_tensor,
1477 recurrent_to_cell_weights_tensor, recurrent_to_output_weights_tensor,
1478 cell_to_input_weights_tensor, cell_to_forget_weights_tensor, cell_to_output_weights_tensor,
1479 input_layer_norm_weights_tensor, forget_layer_norm_weights_tensor,
1480 cell_layer_norm_weights_tensor, output_layer_norm_weights_tensor,
1481 /*aux_input=*/nullptr,
1482 /*aux_input_to_input_weights=*/nullptr,
1483 /*aux_input_to_forget_weights=*/nullptr,
1484 /*aux_input_to_cell_weights=*/nullptr,
1485 /*aux_input_to_output_weights=*/nullptr, input_gate_bias_tensor, forget_gate_bias_tensor,
1486 cell_gate_bias_tensor, output_gate_bias_tensor, projection_weights_tensor,
1487 projection_bias_tensor, output_state_in_tensor, cell_state_in_tensor, node.param(),
1488 /*forward_sequence=*/true, time_major,
1489 /*output_offset=*/0, scratch_buffer_tensor, output_state_out_tensor, cell_state_out_tensor,
1491 !_ctx.at(output_state_in_index).info().isVariable() /* means empty buffer on frontend now */,
1492 !_ctx.at(cell_state_in_index).info().isVariable());
1494 _return_fn = std::move(fn);
1498 } // namespace backend
1499 } // namespace onert