From 80de8eeec580480e218713dbbc745377934b8ca9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ivan=20Vagin/AI=20Tools=20Lab=20/SRR/Engineer/=EC=82=BC?= =?utf8?q?=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Wed, 10 Jul 2019 08:22:15 +0300 Subject: [PATCH] [neurun] ACL CL kernel generation functionality moved into KernelGenerator (#5584) ACL CL kernel generation functionality moved from StageGenerator into KernelGenerator Signed-off-by: Ivan Vagin --- runtimes/neurun/backend/acl_cl/KernelGenerator.cc | 2174 +++++++++++++- runtimes/neurun/backend/acl_cl/StageGenerator.cc | 3273 +-------------------- 2 files changed, 2174 insertions(+), 3273 deletions(-) diff --git a/runtimes/neurun/backend/acl_cl/KernelGenerator.cc b/runtimes/neurun/backend/acl_cl/KernelGenerator.cc index b125612..45ab283 100644 --- a/runtimes/neurun/backend/acl_cl/KernelGenerator.cc +++ b/runtimes/neurun/backend/acl_cl/KernelGenerator.cc @@ -46,6 +46,102 @@ namespace acl_cl using ::neurun::backend::acl_common::asAclFunction; // +// ActivationBuilder +// +class ActivationBuilder +{ +public: + explicit ActivationBuilder(IExecutionBuilder &builder) : _builder(builder) + { + // DO NOTHING + } + +private: + void appendReLU(::arm_compute::ICLTensor *ifm_alloc); + void appendReLU1(::arm_compute::ICLTensor *ifm_alloc); + void appendReLU6(::arm_compute::ICLTensor *ifm_alloc); + +public: + void append(model::Activation code, ::arm_compute::ICLTensor *ifm_alloc); + +private: + IExecutionBuilder &_builder; +}; + +void ActivationBuilder::appendReLU(::arm_compute::ICLTensor *ifm_alloc) +{ + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc, nullptr, act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _builder.append(std::move(acl_fn)); +} + +void ActivationBuilder::appendReLU1(::arm_compute::ICLTensor *ifm_alloc) +{ + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc, nullptr, act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _builder.append(std::move(acl_fn)); +} + +void ActivationBuilder::appendReLU6(::arm_compute::ICLTensor *ifm_alloc) +{ + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc, nullptr, act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _builder.append(std::move(acl_fn)); +} + +void ActivationBuilder::append(model::Activation code, ::arm_compute::ICLTensor *ifm_alloc) +{ + switch (code) + { + case model::Activation::NONE: + { + // DO NOTHING + break; + } + case model::Activation::RELU: + { + appendReLU(ifm_alloc); + break; + } + case model::Activation::RELU1: + { + appendReLU1(ifm_alloc); + break; + } + case model::Activation::RELU6: + { + appendReLU6(ifm_alloc); + break; + } + default: + { + throw std::runtime_error("Not supported, yet"); + } + } +} + +// // KernelGenerator // KernelGenerator::KernelGenerator(const neurun::model::Operands &ctx, @@ -55,115 +151,2083 @@ KernelGenerator::KernelGenerator(const neurun::model::Operands &ctx, // DO NOTHING } -void KernelGenerator::visit(const model::operation::CastNode & /*node*/) {} +void KernelGenerator::visit(const model::operation::CastNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::CastNode::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::Conv2DNode &node) +{ + using model::operation::Conv2DNode; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2DNode::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2DNode::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2DNode::Input::BIAS)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto stride = node.param().stride; + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + const auto conv_info = acl_common::asPadStrideInfo(padding, stride); + const auto act_info = acl_common::asActivationLayerInfo(activation); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLConvolutionLayer>(); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(), + conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const model::operation::DepthwiseConv2DNode &node) +{ + using model::operation::DepthwiseConv2DNode; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(DepthwiseConv2DNode::Input::INPUT)}; + const auto ker_index{node.getInputs().at(DepthwiseConv2DNode::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(DepthwiseConv2DNode::Input::BIAS)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + + const auto stride = node.param().stride; + const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, + stride, ker_width, ker_height); + const auto multiplier = node.param().multiplier; + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + const auto conv_info = acl_common::asPadStrideInfo(padding, stride); + // TODO Use `activation` instead of `model::Activation::NONE`. See below. + const auto act_info = acl_common::asActivationLayerInfo(model::Activation::NONE); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); + + fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(), + conv_info, multiplier, act_info); + + _execution_builder->append(asAclFunction(std::move(fn))); + + // TODO Use fused activation instead of separate layer after switching to ACL version >= v19.05. + // Prior versions had a bug due to which the fused activation did not apply in some cases. + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const model::operation::MaxPool2DNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::MaxPool2DNode::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl; + VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl; + VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl; + VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl; + VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl; + VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl; + VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl; + VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl; + VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl; + VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl; + VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl; + VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX, + ::arm_compute::Size2D{kw, kh}, + acl_common::asPadStrideInfo(padding, stride)}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append((std::move(acl_fn))); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const model::operation::AvgPool2DNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::AvgPool2DNode::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl; + VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl; + VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl; + VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl; + VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl; + VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl; + VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl; + VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl; + VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl; + VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl; + VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl; + VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + ::arm_compute::PoolingLayerInfo info{ + ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh}, + acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append((std::move(acl_fn))); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const model::operation::ConcatNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto axis_index{node.param().axis_index}; + + std::vector input_indexes; + + for (const auto &input : node.getInputs()) + input_indexes.emplace_back(input); + + const auto axis = _ctx.at(axis_index).asScalar(); + + // If tensor allocator allocate as subtensor + bool canEliminate = true; + for (auto &ifm_ind : input_indexes) + { + if (!_tensor_builder->isSubTensorOf(ofm_index, ifm_ind)) + { + canEliminate = false; + break; + } + } + if (canEliminate) + { + // If concat eliminated, return a NOP IFunction + _execution_builder->append(nnfw::cpp14::make_unique()); + return; + } + + auto output_alloc = _tensor_builder->at(ofm_index).get(); + + std::vector<::neurun::backend::acl_cl::operand::ICLTensor *> input_allocs; + for (auto &ifm_ind : input_indexes) + input_allocs.emplace_back(_tensor_builder->at(ifm_ind).get()); + + auto fn = nnfw::cpp14::make_unique<::neurun::backend::acl_cl::kernel::ConcatLayer>(); + + fn->configure(input_allocs, axis, output_alloc); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::FullyConnectedNode &node) +{ + using model::operation::FullyConnectedNode; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnectedNode::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnectedNode::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnectedNode::Input::BIAS)}; + + const auto input_rank = _ctx.at(input_index).shape().rank(); + // TODO Currently we are not handling where the case is that the input's rank is 3. + // The handling should be added in the future. + assert(input_rank != 3); + + const auto output_size = _ctx.at(output_index).shape().dim(1); + UNUSED_RELEASE(output_size); + assert(_ctx.at(bias_index).shape().dim(0) == output_size); + assert(_ctx.at(weight_index).shape().dim(0) == output_size); + const auto batch_size = _ctx.at(output_index).shape().dim(0); + const auto input_size = _ctx.at(weight_index).shape().dim(1); + + // Check for reshaping input's shape into rank-2 + bool needs_reshape = false; + neurun::model::Shape reshape(2); + if (input_rank == 4) + { + // TODO Support NCHW frontend + model::FeatureShape ifm_shape_feature = _ctx.at(input_index).shape().asFeature(); + auto feature_size = + ifm_shape_feature.N * ifm_shape_feature.C * ifm_shape_feature.H * ifm_shape_feature.W; + + UNUSED_RELEASE(feature_size); + assert(feature_size == batch_size * input_size); + + // for reshaping + needs_reshape = true; + reshape.dim(0) = batch_size; /* H */ + reshape.dim(1) = input_size; /* W */ + } + + const auto activation = node.param().activation; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto weight_alloc = _tensor_builder->at(weight_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + auto acl_layout = output_alloc->handle()->info()->data_layout(); + + auto fn = nnfw::cpp14::make_unique(); + + fn->configure(input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), + output_alloc->handle(), needs_reshape, + ::neurun::backend::acl_common::asTensorShape( + reshape, ::neurun::backend::acl_common::asRuntimeLayout(acl_layout))); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, output_alloc->handle()); +} + +void KernelGenerator::visit(const model::operation::MulNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(model::operation::MulNode::Input::LHS)}; + const auto rhs_index{node.getInputs().at(model::operation::MulNode::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseMultiplication>(); + + l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale + arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const model::operation::ReduceSumNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::ReduceSumNode::Input::INPUT)}; + const auto axis_index{node.param().axis_index}; + + std::vector axes; + const auto axis_base = _ctx.at(axis_index).data().base(); + const auto axis_size = _ctx.at(axis_index).shape().num_elements(); + const auto input_rank = _ctx.at(input_index).shape().rank(); + + // The axis's data must exist as constant values + assert(axis_base != nullptr); + for (size_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast(axis_base) + n); + if (axis_value < 0) + { + axis_value += input_rank; + } + axes.emplace_back( + ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, axis_value).value()); + } + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + std::set axes_set; + // TODO Support NCHW frontend + // TODO Change the layout of frontend and backend to be the same + auto acl_layout = input_alloc->handle()->info()->data_layout(); + // CWHN -> WHCN + uint32_t permutation[4] = {2, 0, 1, 3}; + for (size_t i = 0; i < axes.size(); ++i) + { + if (acl_layout == ::arm_compute::DataLayout::NCHW && input_rank == 4) + { + axes_set.insert(permutation[axes[i]]); + } + else + { + axes_set.insert(axes[i]); + } + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); + + fn->configure(input_alloc->handle(), output_alloc->handle(), axes_set, + ::arm_compute::ReduceOperation::SUM); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::ReshapeNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::ReshapeNode::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + // NOTE This operation must not be changed the layout from frontend to backend + // However, this runtime can be change the layout of this operation from NHWC to NCHW now + // TODO Change the layout of frontend and backend to be the same and layer to CLReshapeLayer + auto fn = nnfw::cpp14::make_unique<::arm_compute::misc::GenericReshapeLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle()); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::SqueezeNode &node) +{ + // Squeeze is identical to reshape except that it has an optional dimensions input. + // In addition, optional dims_index is ignored since output tensor already has squeezed shape + // by freezer and toco + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::SqueezeNode::Input::INPUT)}; + const auto dims_index{node.param().dims}; + (void)dims_index; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto fn = nnfw::cpp14::make_unique(); + fn->configure(input_alloc->handle(), output_alloc->handle()); + auto acl_fn = asAclFunction(std::move(fn)); + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::TanhNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::TanhNode::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::SoftmaxNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::SoftmaxNode::Input::INPUT)}; + + const auto beta = node.param().beta; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSoftmaxLayer>(); + + fn->configure(input_alloc->handle(), output_alloc->handle(), beta); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::StridedSliceNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::StridedSliceNode::Input::INPUT)}; + const auto startData_index{node.param().startData_index}; + const auto endData_index{node.param().endData_index}; + const auto stridesData_index{node.param().stridesData_index}; + const auto beginMask_index{node.param().beginMask_index}; + const auto endMask_index{node.param().endMask_index}; + const auto shrinkAxisMask_index{node.param().shrinkAxisMask_index}; + + // Set initializers for indices data such as order of inputData + int input_rank = _ctx.at(input_index).shape().rank(); + std::vector starts; + std::vector ends; + std::vector strides; + starts.resize(input_rank, 0); + ends.resize(input_rank, 0); + strides.resize(input_rank, 0); + { + auto input_shape = _ctx.at(input_index).shape(); + auto startData_base = _ctx.at(startData_index).data().base(); + auto endData_base = _ctx.at(endData_index).data().base(); + auto stridesData_base = _ctx.at(stridesData_index).data().base(); + const int startData_size = _ctx.at(startData_index).shape().num_elements(); + const int endData_size = _ctx.at(endData_index).shape().num_elements(); + const int stridesData_size = _ctx.at(stridesData_index).shape().num_elements(); + + using neurun::model::DataType; + + UNUSED_RELEASE(startData_size); + UNUSED_RELEASE(endData_size); + UNUSED_RELEASE(stridesData_size); + + assert(_ctx.at(startData_index).typeInfo().type() == DataType::INT32); + assert(_ctx.at(endData_index).typeInfo().type() == DataType::INT32); + assert(_ctx.at(stridesData_index).typeInfo().type() == DataType::INT32); + assert(startData_size == input_rank); + assert(endData_size == input_rank); + assert(stridesData_size == input_rank); + + assert(startData_base != nullptr); + for (int n = 0; n < input_rank; ++n) + { + auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n).value(); + + int32_t start_value = *(reinterpret_cast(startData_base) + n); + starts[axis] = start_value; + + int32_t end_value = *(reinterpret_cast(endData_base) + n); + ends[axis] = end_value; + + int32_t strides_value = *(reinterpret_cast(stridesData_base) + n); + strides[axis] = strides_value; + } + } + + // Set mask bits such as order of inputData + const auto beginMask = ::neurun::backend::acl_common::ReorderBits( + _ctx.at(beginMask_index).asScalar(), input_rank); + const auto endMask = ::neurun::backend::acl_common::ReorderBits( + _ctx.at(endMask_index).asScalar(), input_rank); + const auto shrinkAxisMask = ::neurun::backend::acl_common::ReorderBits( + _ctx.at(shrinkAxisMask_index).asScalar(), input_rank); + + auto outputData_alloc = _tensor_builder->at(output_index).get(); + auto inputData_alloc = _tensor_builder->at(input_index).get(); + + ::arm_compute::Coordinates starts_set; + ::arm_compute::Coordinates ends_set; + ::arm_compute::BiStrides strides_set; + + for (size_t i = 0; i < starts.size(); ++i) + { + starts_set.set(i, starts[i]); + ends_set.set(i, ends[i]); + strides_set.set(i, strides[i]); + } + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLStridedSlice>(); + + l->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set, + strides_set, beginMask, endMask, shrinkAxisMask); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::TransposeNode &node) +{ + const auto ofm_idx{node.getOutputs().at(0)}; + const auto ifm_idx{node.getInputs().at(model::operation::TransposeNode::Input::INPUT)}; + const auto perm{node.param().perm}; + + const auto rank = _ctx.at(ifm_idx).shape().rank(); + std::vector pv; + const auto perm_base = _ctx.at(perm).data().base(); + const int perm_size = _ctx.at(perm).shape().num_elements(); + + assert(perm_base != nullptr); + for (int32_t n = 0; n < perm_size; ++n) + { + int32_t perm_value = *(reinterpret_cast(perm_base) + n); + assert(static_cast(perm_value) < rank); + pv.emplace_back(perm_value); + } + + auto ofm_alloc = _tensor_builder->at(ofm_idx).get(); + auto ifm_alloc = _tensor_builder->at(ifm_idx).get(); + // TODO Support NCHW frontend + // TODO Change the layout of frontend and backend to be the same + auto acl_layout = ifm_alloc->handle()->info()->data_layout(); + // Reversed + auto backend_pv = ::neurun::backend::acl_common::getARMComputePermutationVector(rank, pv); + if (acl_layout == ::arm_compute::DataLayout::NCHW && rank == 4) + { + // CWHN -> WHCN + // C : 0 -> 2, W : 1 -> 0, H : 2 -> 1, N : 3 -> 3 + ::arm_compute::PermutationVector cwhn_to_whcn_pv; + uint32_t axis[4] = {2, 0, 1, 3}; + for (size_t i = 0; i < pv.size(); ++i) + { + cwhn_to_whcn_pv.set(axis[i], axis[backend_pv[i]]); + } + backend_pv = cwhn_to_whcn_pv; + } + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLPermute>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::AddNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)}; + const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticAddition>(); + + l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), + arm_compute::ConvertPolicy::SATURATE); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const model::operation::SubNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(model::operation::SubNode::Input::LHS)}; + const auto rhs_index{node.getInputs().at(model::operation::SubNode::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticSubtraction>(); + + l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), + arm_compute::ConvertPolicy::SATURATE); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); -void KernelGenerator::visit(const model::operation::Conv2DNode & /*node*/) {} + _execution_builder->append(std::move(acl_fn)); -void KernelGenerator::visit(const model::operation::DepthwiseConv2DNode & /*node*/) {} + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const model::operation::DivNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(model::operation::DivNode::Input::LHS)}; + const auto rhs_index{node.getInputs().at(model::operation::DivNode::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticDivision>(); + + l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const model::operation::ExpNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::ExpNode::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); -void KernelGenerator::visit(const model::operation::MaxPool2DNode & /*node*/) {} + std::unique_ptr<::arm_compute::IFunction> fn; -void KernelGenerator::visit(const model::operation::AvgPool2DNode & /*node*/) {} + auto l = nnfw::cpp14::make_unique<::arm_compute::CLExpLayer>(); -void KernelGenerator::visit(const model::operation::ConcatNode & /*node*/) {} + l->configure(input_alloc->handle(), output_alloc->handle()); -void KernelGenerator::visit(const model::operation::FullyConnectedNode & /*node*/) {} + fn = std::move(l); -void KernelGenerator::visit(const model::operation::MulNode & /*node*/) {} + auto acl_fn = asAclFunction(std::move(fn)); -void KernelGenerator::visit(const model::operation::ReduceSumNode & /*node*/) {} + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::LogisticNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::LogisticNode::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); -void KernelGenerator::visit(const model::operation::ReshapeNode & /*node*/) {} + _execution_builder->append(std::move(acl_fn)); +} -void KernelGenerator::visit(const model::operation::SqueezeNode & /*node*/) {} +void KernelGenerator::visit(const model::operation::LogicalAndNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input0_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT1)}; -void KernelGenerator::visit(const model::operation::TanhNode & /*node*/) {} + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input0_alloc = _tensor_builder->at(input0_index).get(); + auto input1_alloc = _tensor_builder->at(input1_index).get(); -void KernelGenerator::visit(const model::operation::SoftmaxNode & /*node*/) {} + std::unique_ptr<::arm_compute::IFunction> fn; -void KernelGenerator::visit(const model::operation::StridedSliceNode & /*node*/) {} + auto l = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>(); -void KernelGenerator::visit(const model::operation::TransposeNode & /*node*/) {} + l->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(), + ::arm_compute::BinaryLogicalOperation::AND); -void KernelGenerator::visit(const model::operation::AddNode & /*node*/) {} + fn = std::move(l); -void KernelGenerator::visit(const model::operation::SubNode & /*node*/) {} + auto acl_fn = asAclFunction(std::move(fn)); -void KernelGenerator::visit(const model::operation::DivNode & /*node*/) {} + _execution_builder->append(std::move(acl_fn)); +} -void KernelGenerator::visit(const model::operation::ExpNode & /*node*/) {} +void KernelGenerator::visit(const model::operation::LSTMNode &node) +{ + // TODO Support dynamic rnn + // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection. + const auto scratch_buffer_index{ + node.getOutputs().at(model::operation::LSTMNode::Output::SCRATCH_BUFFER)}; + const auto output_state_out_index{ + node.getOutputs().at(model::operation::LSTMNode::Output::OUTPUT_STATE_OUT)}; + const auto cell_state_out_index{ + node.getOutputs().at(model::operation::LSTMNode::Output::CELL_STATE_OUT)}; + const auto output_index{node.getOutputs().at(model::operation::LSTMNode::Output::OUTPUT)}; -void KernelGenerator::visit(const model::operation::LogisticNode & /*node*/) {} + const auto input_index{node.getInputs().at(model::operation::LSTMNode::Input::INPUT)}; + const auto input_to_input_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional + const auto input_to_forget_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::INPUT_TO_FORGET_WEIGHTS)}; + const auto input_to_cell_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::INPUT_TO_CELL_WEIGHTS)}; + const auto input_to_output_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::INPUT_TO_OUTPUT_WEIGHTS)}; + const auto recurrent_to_input_weights_index{node.getInputs().at( + model::operation::LSTMNode::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional + const auto recurrent_to_forget_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::RECURRENT_TO_FORGET_WEIGHTS)}; + const auto recurrent_to_cell_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::RECURRENT_TO_CELL_WEIGHTS)}; + const auto recurrent_to_output_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; + const auto cell_to_input_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::CELL_TO_INPUT_WEIGHTS)}; // optional + const auto cell_to_forget_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::CELL_TO_FORGET_WEIGHTS)}; // optional + const auto cell_to_output_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional + const auto input_gate_bias_index{ + node.getInputs().at(model::operation::LSTMNode::Input::INPUT_GATE_BIAS)}; + const auto forget_gate_bias_index{ + node.getInputs().at(model::operation::LSTMNode::Input::FORGET_GATE_BIAS)}; + const auto cell_bias_index{node.getInputs().at(model::operation::LSTMNode::Input::CELL_BIAS)}; + const auto output_gate_bias_index{ + node.getInputs().at(model::operation::LSTMNode::Input::OUTPUT_GATE_BIAS)}; + const auto projection_weights_index{ + node.getInputs().at(model::operation::LSTMNode::Input::PROJECTION_WEIGHTS)}; // optional + const auto projection_bias_index{ + node.getInputs().at(model::operation::LSTMNode::Input::PROJECTION_BIAS)}; // optional + const auto output_state_in_index{ + node.getInputs().at(model::operation::LSTMNode::Input::OUTPUT_STATE_IN)}; + const auto cell_state_in_index{ + node.getInputs().at(model::operation::LSTMNode::Input::CELL_STATE_IN)}; + const auto cell_threshold = node.param().cell_threshold; + const auto projection_threshold = node.param().projection_threshold; -void KernelGenerator::visit(const model::operation::LogicalAndNode & /*node*/) {} + bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(input_to_input_weights_index).shape().dim(1) != 0; + bool has_recurrent_to_input_weights = + _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && + _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0; + bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0; + bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0; + bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 && + _ctx.at(projection_weights_index).shape().dim(1) != 0; + bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0); -void KernelGenerator::visit(const model::operation::LSTMNode & /*node*/) {} + // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG. + // true: no CIFG + // false: CIFG + // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG). + bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; -void KernelGenerator::visit(const model::operation::ReduceMaxNode & /*node*/) {} + // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole. + // But the cell_to_input_weights does not exist in regular CIFG although peephole. + // true: peephole + // false: no peephole + bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights; -void KernelGenerator::visit(const model::operation::ComparisonNode & /*node*/) {} + // NOTE Although the projection weights has data the projection bias may not have data. + bool has_projection_param = has_projection_weights; -void KernelGenerator::visit(const model::operation::RSQRTNode & /*node*/) {} + const auto activation = node.param().activation; + const auto cell_clip = cell_threshold; + const auto projection_clip = projection_threshold; + assert(cell_clip >= 0.f && projection_clip >= 0.f); -void KernelGenerator::visit(const model::operation::ReLUNode & /*node*/) {} + auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get(); + auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get(); + auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get(); + auto output_alloc = _tensor_builder->at(output_index).get(); -void KernelGenerator::visit(const model::operation::ResizeBilinearNode & /*node*/) {} + auto input_alloc = _tensor_builder->at(input_index).get(); -void KernelGenerator::visit(const model::operation::ReLU1Node & /*node*/) {} + auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get(); + auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get(); + auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get(); + auto recurrent_to_forget_weights_alloc = + _tensor_builder->at(recurrent_to_forget_weights_index).get(); + auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get(); + auto recurrent_to_output_weights_alloc = + _tensor_builder->at(recurrent_to_output_weights_index).get(); -void KernelGenerator::visit(const model::operation::ReLU6Node & /*node*/) {} + auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get(); + auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get(); + auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get(); + auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get(); + auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get(); -void KernelGenerator::visit(const model::operation::RNNNode & /*node*/) {} + auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation); -void KernelGenerator::visit(const model::operation::FloorNode & /*node*/) {} + std::unique_ptr<::arm_compute::IFunction> fn; -void KernelGenerator::visit(const model::operation::SpaceToDepthNode & /*node*/) {} + auto l = nnfw::cpp14::make_unique<::arm_compute::CLLSTMLayer>(); -void KernelGenerator::visit(const model::operation::L2Pool2DNode & /*node*/) {} + ::arm_compute::LSTMParams<::arm_compute::ICLTensor> lstm_params{}; + if (has_cifg_param) + { + auto input_to_input_weights_alloc = + _tensor_builder->at(input_to_input_weights_index).get(); // optional + auto recurrent_to_input_weights_alloc = + _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional + auto cell_to_input_weights_handle = + has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle() + : nullptr; // optional (non-cifg && peephole) + auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional + lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(), + recurrent_to_input_weights_alloc->handle(), + cell_to_input_weights_handle, input_gate_bias_alloc->handle()); + } + if (has_peephole_param) + { + auto cell_to_forget_weights_alloc = + _tensor_builder->at(cell_to_forget_weights_index).get(); // optional + auto cell_to_output_weights_alloc = + _tensor_builder->at(cell_to_output_weights_index).get(); // optional + lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(), + cell_to_output_weights_alloc->handle()); + } + if (has_projection_param) + { + auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional + auto projection_bias_handle = has_projection_bias + ? _tensor_builder->at(projection_bias_index).get()->handle() + : nullptr; // optional + lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle); + } -void KernelGenerator::visit(const model::operation::EmbeddingLookupNode & /*node*/) {} + l->configure( + input_alloc->handle(), input_to_forget_weights_alloc->handle(), + input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(), + recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(), + recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(), + cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(), + cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(), + output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(), + lstm_params, act_info, cell_clip, projection_clip); -void KernelGenerator::visit(const model::operation::L2NormalizationNode & /*node*/) {} + fn = std::move(l); -void KernelGenerator::visit(const model::operation::HashtableLookupNode & /*node*/) {} + auto acl_fn = asAclFunction(std::move(fn)); -void KernelGenerator::visit(const model::operation::PReLUNode & /*node*/) {} + _execution_builder->append(std::move(acl_fn)); +} -void KernelGenerator::visit(const model::operation::TransposeConvNode & /*node*/) {} +void KernelGenerator::visit(const model::operation::ReduceMaxNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::ReduceMaxNode::Input::INPUT)}; + const auto axis_index{node.param().axis_index}; -void KernelGenerator::visit(const model::operation::SQRTNode & /*node*/) {} + auto input_shape = _ctx.at(input_index).shape(); + auto axis_shape = _ctx.at(axis_index).shape(); -void KernelGenerator::visit(const model::operation::LogicalOrNode & /*node*/) {} + std::vector axis; + { + const auto ifm_rank = input_shape.rank(); + switch (axis_shape.rank()) + { + case 0: // scalar + { + int32_t axis_value = _ctx.at(axis_index).asScalar(); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.emplace_back( + ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); + break; + } + case 1: // vector + { + const auto axis_base = _ctx.at(axis_index).data().base(); + const int axis_size = axis_shape.num_elements(); -void KernelGenerator::visit(const model::operation::LogicalNotNode & /*node*/) {} + // If axis's data does not exist as constant values and can be gotten as input data, we have + // to find a way to infer output shape when sinking output. + assert(axis_base != nullptr); + for (int32_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.emplace_back( + ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); + } + break; + } + default: + throw std::runtime_error("Not supported"); + break; + } + } -void KernelGenerator::visit(const model::operation::SquaredDifferenceNode & /*node*/) {} + const auto input_rank = input_shape.rank(); -void KernelGenerator::visit(const model::operation::TopKV2Node & /*node*/) {} + auto ofm_alloc = _tensor_builder->at(output_index).get(); + auto ifm_alloc = _tensor_builder->at(input_index).get(); + std::set axes; + // TODO Support NCHW frontend + // TODO Change the layout of frontend and backend to be the same + auto acl_layout = ifm_alloc->handle()->info()->data_layout(); + // CWHN -> WHCN + uint32_t permutation[4] = {2, 0, 1, 3}; + for (size_t i = 0; i < axis.size(); ++i) + { + if (acl_layout == ::arm_compute::DataLayout::NCHW && input_rank == 4) + { + axes.insert(permutation[axis[i]]); + } + else + { + axes.insert(axis[i]); + } + } -void KernelGenerator::visit(const model::operation::GatherNode & /*node*/) {} + std::unique_ptr<::arm_compute::IFunction> fn; -void KernelGenerator::visit(const model::operation::NegNode & /*node*/) {} + auto l = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); -void KernelGenerator::visit(const model::operation::AbsNode & /*node*/) {} + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), axes, arm_compute::ReduceOperation::MAX); -void KernelGenerator::visit(const model::operation::ArgMaxNode & /*node*/) {} + fn = std::move(l); -void KernelGenerator::visit(const model::operation::DequantizeNode & /*node*/) {} + auto acl_fn = asAclFunction(std::move(fn)); -void KernelGenerator::visit(const model::operation::MeanNode & /*node*/) {} + _execution_builder->append(std::move(acl_fn)); +} -void KernelGenerator::visit(const model::operation::LocalResponseNormalizationNode & /*node*/) {} +void KernelGenerator::visit(const model::operation::ComparisonNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input0_index{node.getInputs().at(model::operation::ComparisonNode::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(model::operation::ComparisonNode::Input::INPUT1)}; -void KernelGenerator::visit(const model::operation::DepthToSpaceNode & /*node*/) {} + const auto comparison_type = node.param().comparison_type; -void KernelGenerator::visit(const model::operation::ReduceMinNode & /*node*/) {} + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input0_alloc = _tensor_builder->at(input0_index).get(); + auto input1_alloc = _tensor_builder->at(input1_index).get(); -void KernelGenerator::visit(const model::operation::SplitNode & /*node*/) {} + std::unique_ptr<::arm_compute::IFunction> fn; -void KernelGenerator::visit(const model::operation::UnpackNode & /*node*/) {} + auto l = nnfw::cpp14::make_unique<::arm_compute::CLComparison>(); -void KernelGenerator::visit(const model::operation::PadNode & /*node*/) {} + l->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(), + (arm_compute::ComparisonOperation)comparison_type); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::RSQRTNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::LogisticNode::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLRsqrtLayer>(); + + fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const model::operation::ReLUNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::ReLUNode::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = nnfw::cpp14::make_unique(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + + fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::ResizeBilinearNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto ifm_index{node.getInputs().at(model::operation::ResizeBilinearNode::Input::INPUT)}; + const auto height_index{node.param().height_index}; + const auto width_index{node.param().width_index}; + (void)height_index; + (void)width_index; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLScale>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), + ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE, + ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::ReLU1Node &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::ReLU1Node::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::ReLU6Node &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::ReLU6Node::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f}; + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::RNNNode &node) +{ + const auto output_index{node.getOutputs().at(model::operation::RNNNode::Output::OUTPUT)}; + const auto hidden_state_out_index{ + node.getOutputs().at(model::operation::RNNNode::Output::HIDDEN_STATE_OUT)}; + + const auto input_index{node.getInputs().at(model::operation::RNNNode::Input::INPUT)}; + const auto weights_index{node.getInputs().at(model::operation::RNNNode::Input::WEIGHTS)}; + const auto recurrent_weights_index{ + node.getInputs().at(model::operation::RNNNode::Input::RECURRENT_WEIGHTS)}; + const auto bias_index{node.getInputs().at(model::operation::RNNNode::Input::BIAS)}; + const auto hidden_state_in_index{ + node.getInputs().at(model::operation::RNNNode::Input::HIDDEN_STATE_IN)}; + + const auto activation = node.param().activation; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get(); + + auto input_alloc = _tensor_builder->at(input_index).get(); + auto weights_alloc = _tensor_builder->at(weights_index).get(); + auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get(); + auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation); + + auto copy_layer = nnfw::cpp14::make_unique<::arm_compute::CLCopy>(); + copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle()); + _execution_builder->append(asAclFunction(std::move(copy_layer))); + + std::unique_ptr<::arm_compute::IFunction> fn; + auto rnn_layer = nnfw::cpp14::make_unique<::arm_compute::CLRNNLayerEx>(); + rnn_layer->configure(input_alloc->handle(), weights_alloc->handle(), + recurrent_weights_alloc->handle(), bias_alloc->handle(), + hidden_state_out_alloc->handle(), output_alloc->handle(), act_info); + fn = std::move(rnn_layer); + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const model::operation::FloorNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::FloorNode::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLFloor>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::SpaceToDepthNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::SpaceToDepthNode::Input::INPUT)}; + const auto block_size_index{node.param().block_size_index}; + + auto block_size = _ctx.at(block_size_index).asScalar(); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToDepth>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::L2Pool2DNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::L2Pool2DNode::Input::INPUT)}; + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + + uint32_t kw = node.param().kw; + uint32_t kh = node.param().kh; + const auto stride = node.param().stride; + const auto padding = + neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + ::arm_compute::PoolingLayerInfo info{ + ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh}, + ::neurun::backend::acl_common::asPadStrideInfo(padding, stride)}; + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); + + ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle()); +} + +void KernelGenerator::visit(const model::operation::EmbeddingLookupNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lookups_index{ + node.getInputs().at(model::operation::EmbeddingLookupNode::Input::LOOKUPS)}; + const auto values_index{ + node.getInputs().at(model::operation::EmbeddingLookupNode::Input::VALUES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto lookups_alloc = _tensor_builder->at(lookups_index).get(); + auto values_alloc = _tensor_builder->at(values_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLEmbeddingLookup>(); + + l->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::L2NormalizationNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::L2NormalizationNode::Input::INPUT)}; + + // {CL|Neon}L2Normalization performs the reduction only along dimension 0 + // L2 Normalization always performs the reduction along the depth axis + // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by + // choosing normalization parameters as below + + int32_t radius = 2 * _ctx.at(ifm_index).shape().dim(3) + 1; // normSize = depth * 2 + 1 + float alpha = 1.0f; // In the implementation to make alpha_ become 1 + float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) + float bias = 0.0f; // Don't offset the reduction. + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, + radius, alpha, beta, bias, false); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::HashtableLookupNode &node) +{ + const auto output_index{ + node.getOutputs().at(model::operation::HashtableLookupNode::Output::OUTPUT)}; + const auto hits_index{node.getOutputs().at(model::operation::HashtableLookupNode::Output::HITS)}; + + const auto lookups_index{ + node.getInputs().at(model::operation::HashtableLookupNode::Input::LOOKUPS)}; + const auto keys_index{node.getInputs().at(model::operation::HashtableLookupNode::Input::KEYS)}; + const auto values_index{ + node.getInputs().at(model::operation::HashtableLookupNode::Input::VALUES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto hits_alloc = _tensor_builder->at(hits_index).get(); + + auto lookups_alloc = _tensor_builder->at(lookups_index).get(); + auto keys_alloc = _tensor_builder->at(keys_index).get(); + auto values_alloc = _tensor_builder->at(values_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLHashtableLookup>(); + + l->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(), + output_alloc->handle(), hits_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::PReLUNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::PReLUNode::Input::INPUT)}; + const auto alpha_index{node.getInputs().at(model::operation::PReLUNode::Input::ALPHA)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto alpha_alloc = _tensor_builder->at(alpha_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLPReLU>(); + + l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::TransposeConvNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto output_shape_index{ + node.getInputs().at(model::operation::TransposeConvNode::Input::OUTPUT_SHAPE)}; + const auto ker_index{node.getInputs().at(model::operation::TransposeConvNode::Input::KERNEL)}; + const auto ifm_index{node.getInputs().at(model::operation::TransposeConvNode::Input::INPUT)}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(); + + const auto stride = node.param().stride; + + assert((node.param().padding.type == model::PaddingType::SAME) || + (node.param().padding.type == model::PaddingType::VALID)); + auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride, + ker_shape.W, ker_shape.H); + + uint32_t invalid_horizontal = 0; + uint32_t invalid_vertical = 0; + if (node.param().padding.type == model::PaddingType::VALID) + { + invalid_horizontal = + ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1); + invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1); + } + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + + const auto tconv_info = acl_common::asPadStrideInfo(padding, stride); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>(); + + l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info, + invalid_vertical, invalid_horizontal); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::SQRTNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::SQRTNode::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + l->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::LogicalOrNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input0_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT0)}; + const auto input1_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT1)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input0_alloc = _tensor_builder->at(input0_index).get(); + auto input1_alloc = _tensor_builder->at(input1_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseOr>(); + + l->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::LogicalNotNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::LogicalNotNode::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseNot>(); + + l->configure(input_alloc->handle(), output_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::SquaredDifferenceNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(model::operation::SquaredDifferenceNode::Input::LHS)}; + const auto rhs_index{node.getInputs().at(model::operation::SquaredDifferenceNode::Input::RHS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseSquaredDiff>(); + + l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::TopKV2Node &node) +{ + const auto outputValues_index{ + node.getOutputs().at(model::operation::TopKV2Node::Output::OUTPUT_VALUES)}; + const auto outputIndices_index{ + node.getOutputs().at(model::operation::TopKV2Node::Output::OUTPUT_INDICES)}; + + const auto inputData_index{node.getInputs().at(model::operation::TopKV2Node::Input::INPUT)}; + const auto k_index{node.param().k_index}; + + // Currently, we only support the vector input. + assert(_ctx.at(inputData_index).shape().rank() == 1 || + _ctx.at(inputData_index).shape().rank() == 2); + + const auto k = _ctx.at(k_index).asScalar(); + + auto values_alloc = _tensor_builder->at(outputValues_index).get(); + auto indices_alloc = _tensor_builder->at(outputIndices_index).get(); + auto input_alloc = _tensor_builder->at(inputData_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLTopKV2>(); + + l->configure(input_alloc->handle(), k, values_alloc->handle(), indices_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::GatherNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto ifm_index{node.getInputs().at(model::operation::GatherNode::Input::INPUT)}; + const auto indices_index{node.getInputs().at(model::operation::GatherNode::Input::INDICES)}; + + const auto axis_index{node.param().axis_index}; + + const auto ifm_shape = _ctx.at(ifm_index).shape(); + + const auto axis_value = static_cast(_ctx.at(axis_index).asScalar()); + const int axis = + ::neurun::backend::acl_common::ToARMComputeAxis(ifm_shape.rank(), axis_value).value(); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto indices_alloc = _tensor_builder->at(indices_index).get(); + auto acl_layout = ofm_alloc->handle()->info()->data_layout(); + UNUSED_RELEASE(acl_layout); + + // NOTE The frontend layout and backend layout must be the same for this operation. + // If not the same, we have to add a stage(?) to perform permutation of output tensor. It + // is not not efficient even if it works well. If so, it would be better to set the + // layout of these backend tensors to the same layout. + // There is also one thing we have to think about. This operation depends on the layout of + // a model. For example, if a model in NHWC has this operation as output rank == 4, indices + // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W + // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. + // TODO Remove this workaround + // It is a workaround how to set the layout of these backend tensors to the layout of the + // frontend when creating them + // TODO Supports front-end in NCHW + // TODO Change the layout of frontend and backend to be the same + // assert(::arm_compute::DataLayout::NHWC == acl_layout); + assert(acl_layout == ifm_alloc->handle()->info()->data_layout()); + assert(acl_layout == indices_alloc->handle()->info()->data_layout()); + + std::unique_ptr<::arm_compute::IFunction> fn; + // TODO Change to CLGather + auto l = nnfw::cpp14::make_unique<::arm_compute::misc::GenericGather>(); + + l->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::NegNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::NegNode::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLNeg>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::AbsNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::AbsNode::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); + + l->configure(input_alloc->handle(), output_alloc->handle(), act_info); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::ArgMaxNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::ArgMaxNode::Input::INPUT)}; + const auto axis_index{node.param().axis_index}; + + auto ifm_shape = _ctx.at(ifm_index).shape(); + auto ofm_shape = _ctx.at(ofm_index).shape(); + auto axis_shape = _ctx.at(axis_index).shape(); + + assert(_ctx.at(axis_index).isConstant()); + // Axis dimension is always 1. + assert(axis_shape.rank() == 1); + assert((ifm_shape.rank() - 1) == ofm_shape.rank()); + + std::vector l_axis; + const int axis_size = axis_shape.num_elements(); + auto axis_base = _ctx.at(axis_index).data().base(); + // TODO Should support axis size > 1. + assert(axis_size == 1); + // axis is tensor with 1 dimension - always a vector. + assert(axis_base != nullptr); + + for (int32_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_shape.rank(); + } + l_axis.push_back(acl_common::ToARMComputeAxis(ifm_shape.rank(), axis_value).value()); + } + + const auto ifm_rank = ifm_shape.rank(); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + // TODO Support NCHW frontend + // TODO Change the layout of frontend and backend to be the same + auto acl_layout = ifm_alloc->handle()->info()->data_layout(); + if (acl_layout == ::arm_compute::DataLayout::NCHW && ifm_rank == 4) + { + // CWHN -> WHCN + uint32_t permutation[4] = {2, 0, 1, 3}; + for (size_t i = 0; i < l_axis.size(); ++i) + l_axis[i] = permutation[l_axis[i]]; + } + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLArgOperation>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), l_axis, ::arm_compute::ArgOperation::MAX); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::DequantizeNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::DequantizeNode::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>(); + + l->configure(input_alloc->handle(), output_alloc->handle()); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::MeanNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::MeanNode::Input::INPUT)}; + + const auto axis_index{node.param().axis_index}; + const auto keep_dims_index{node.param().keep_dims_index}; + (void)keep_dims_index; + + const auto ifm_shape = _ctx.at(ifm_index).shape(); + + std::vector axis; + { + const auto ifm_rank = ifm_shape.rank(); + const auto axis_shape = _ctx.at(axis_index).shape(); + switch (axis_shape.rank()) + { + case 0: // scalar + { + auto axis_value = _ctx.at(axis_index).asScalar(); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.emplace_back( + ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); + break; + } + case 1: // vector + { + const auto axis_base = _ctx.at(axis_index).data().base(); + const int axis_size = axis_shape.num_elements(); + + // If axis's data does not exist as constant values and can be gotten as input data, we have + // to find a way to infer output shape when sinking output. + assert(axis_base != nullptr); + for (int32_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.emplace_back( + ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); + } + break; + } + default: + throw std::runtime_error("Not supported"); + } + } + + const auto ifm_rank = ifm_shape.rank(); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + std::set fixed_axis; + // TODO Support NCHW frontend + // TODO Change the layout of frontend and backend to be the same + auto acl_layout = ifm_alloc->handle()->info()->data_layout(); + // CWHN -> WHCN + uint32_t permutation[4] = {2, 0, 1, 3}; + for (auto a : axis) + { + if (acl_layout == ::arm_compute::DataLayout::NCHW && ifm_rank == 4) + { + fixed_axis.insert(permutation[a]); + } + else + { + fixed_axis.insert(a); + } + } + + std::unique_ptr<::arm_compute::IFunction> fn; + + // NOTE CLReduceMean has a bug that does not support NHWC layout + // CLReduceMean intermediate tensors are always NCHW layout + auto l = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), fixed_axis, + ::arm_compute::ReduceOperation::MEAN); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::LocalResponseNormalizationNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{ + node.getInputs().at(model::operation::LocalResponseNormalizationNode::Input::INPUT)}; + const auto radius_index{node.param().radius_index}; + const auto bias_index{node.param().bias_index}; + const auto alpha_index{node.param().alpha_index}; + const auto beta_index{node.param().beta_index}; + + auto radius = _ctx.at(radius_index).asScalar(); + auto alpha = _ctx.at(alpha_index).asScalar(); + auto beta = _ctx.at(beta_index).asScalar(); + auto bias = _ctx.at(bias_index).asScalar(); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + const auto norm_info = ::arm_compute::NormalizationLayerInfo( + ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::DepthToSpaceNode &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(model::operation::DepthToSpaceNode::Input::INPUT)}; + const auto block_size_index{node.param().block_size_index}; + + auto block_size = _ctx.at(block_size_index).asScalar(); + assert(block_size > 0); + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLDepthToSpace>(); + + l->configure(input_alloc->handle(), output_alloc->handle(), block_size); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::ReduceMinNode &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(model::operation::ReduceMinNode::Input::INPUT)}; + const auto axis_index{node.param().axis_index}; + + auto ifm_shape = _ctx.at(ifm_index).shape(); + auto ofm_shape = _ctx.at(ofm_index).shape(); + auto axis_shape = _ctx.at(axis_index).shape(); + + std::vector axis; + { + const auto ifm_rank = ifm_shape.rank(); + switch (axis_shape.rank()) + { + case 0: // scalar + { + auto axis_value = _ctx.at(axis_index).asScalar(); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.emplace_back( + ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); + break; + } + case 1: // vector + { + const auto axis_base = _ctx.at(axis_index).data().base(); + const int axis_size = axis_shape.num_elements(); + + // If axis's data does not exist as constant values and can be gotten as input data, we have + // to find a way to infer output shape when sinking output. + assert(axis_base != nullptr); + for (int32_t n = 0; n < axis_size; ++n) + { + int32_t axis_value = *(reinterpret_cast(axis_base) + n); + if (axis_value < 0) + { + axis_value += ifm_rank; + } + axis.emplace_back( + ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); + } + break; + } + default: + throw std::runtime_error("Not supported"); + break; + } + } + + const auto ifm_rank = ifm_shape.rank(); + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + std::set fixed_axis; + // TODO Support NCHW frontend + // TODO Change the layout of frontend and backend to be the same + auto acl_layout = ifm_alloc->handle()->info()->data_layout(); + // CWHN -> WHCN + uint32_t permutation[4] = {2, 0, 1, 3}; + for (auto a : axis) + { + if (acl_layout == ::arm_compute::DataLayout::NCHW && ifm_rank == 4) + { + fixed_axis.insert(permutation[a]); + } + else + { + fixed_axis.insert(a); + } + } + + std::unique_ptr<::arm_compute::IFunction> fn; + + auto l = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); + + l->configure(ifm_alloc->handle(), ofm_alloc->handle(), fixed_axis, + ::arm_compute::ReduceOperation::MIN); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::SplitNode &node) +{ + const auto ifm_index{node.getInputs().at(model::operation::SplitNode::Input::INPUT)}; + const auto axis_index{node.param().axis_index}; + const auto num_of_splits_index{node.param().num_of_splits_index}; + + assert(_ctx.at(num_of_splits_index).asScalar() == node.getOutputs().size()); + + const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); + + auto axis = _ctx.at(axis_index).asScalar(); + if (axis < 0) + axis += ifm_rank; + axis = acl_common::ToARMComputeAxis(ifm_rank, axis).value(); + + std::vector output_indexes; + for (const auto &output : node.getOutputs()) + output_indexes.emplace_back(output); + + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + std::vector output_allocs; + for (const auto &ofm_ind : output_indexes) + output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle()); + + auto fixed_axis = axis; + auto acl_layout = ifm_alloc->handle()->info()->data_layout(); + + if (acl_layout == ::arm_compute::DataLayout::NCHW && ifm_rank == 4) + { + // CWHN -> WHCN + uint32_t permutation[4] = {2, 0, 1, 3}; + fixed_axis = permutation[fixed_axis]; + } + + std::unique_ptr<::arm_compute::IFunction> fn; + + // TODO Support NCHW frontend + // TODO Change the layout of frontend and backend to be the same + auto l = nnfw::cpp14::make_unique<::arm_compute::CLSplit>(); + + l->configure(ifm_alloc->handle(), output_allocs, fixed_axis); + + fn = std::move(l); + + auto acl_fn = asAclFunction(std::move(fn)); + + _execution_builder->append(std::move(acl_fn)); +} + +void KernelGenerator::visit(const model::operation::UnpackNode &node) +{ + const auto input_index{node.getInputs().at(model::operation::UnpackNode::Input::INPUT)}; + auto axis{node.param().axis}; + + const auto input_rank = _ctx.at(input_index).shape().rank(); + + if (axis < 0) + axis += input_rank; + axis = acl_common::ToARMComputeAxis(input_rank, axis).value(); + + std::vector output_indexes; + for (const auto &output_index : node.getOutputs()) + output_indexes.emplace_back(output_index); + + auto input = _tensor_builder->at(input_index).get()->handle(); + std::vector outputs; + for (const auto &output_index : output_indexes) + outputs.emplace_back(_tensor_builder->at(output_index)->handle()); + + int fixed_axis = axis; + if (input->info()->num_dimensions() == 4 && + input->info()->data_layout() == ::arm_compute::DataLayout::NCHW) + { + // CWHN -> WHCN + const int permutation[4] = {2, 0, 1, 3}; + fixed_axis = permutation[fixed_axis]; + } + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLUnstack>(); + + fn->configure(input, outputs, fixed_axis); + + _execution_builder->append(asAclFunction(std::move(fn))); +} + +void KernelGenerator::visit(const model::operation::PadNode &node) +{ + const auto input_index{node.getInputs().at(model::operation::PadNode::Input::INPUT)}; + const auto pad_index{node.getInputs().at(model::operation::PadNode::Input::PAD)}; + const auto output_index{node.getOutputs().at(0)}; + assert(_ctx.at(pad_index).isConstant()); + + auto rank = _ctx.at(pad_index).shape().dim(0); + auto pad_base = _ctx.at(pad_index).data().base(); + ::arm_compute::PaddingList padding_list; + padding_list.resize(rank); + for (int32_t n = 0; n < rank; ++n) + { + const int32_t *from = reinterpret_cast(pad_base) + (n * 2); + auto axis = acl_common::ToARMComputeAxis(rank, n).value(); + + padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]}; + } + + auto input_type = _ctx.at(input_index).typeInfo(); + auto data_type = acl_common::asDataType(input_type.type()); + auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()); + const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info); + + auto input = _tensor_builder->at(input_index).get()->handle(); + auto output = _tensor_builder->at(output_index).get()->handle(); + + auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPadLayer>(); + fn->configure(input, output, padding_list, pixel_value); + + _execution_builder->append(asAclFunction(std::move(fn))); +} } // namespace acl_cl } // namespace backend diff --git a/runtimes/neurun/backend/acl_cl/StageGenerator.cc b/runtimes/neurun/backend/acl_cl/StageGenerator.cc index eb245a9..e9da59d 100644 --- a/runtimes/neurun/backend/acl_cl/StageGenerator.cc +++ b/runtimes/neurun/backend/acl_cl/StageGenerator.cc @@ -46,102 +46,6 @@ namespace acl_cl using ::neurun::backend::acl_common::asAclFunction; // -// ActivationBuilder -// -class ActivationBuilder -{ -public: - ActivationBuilder(IExecutionBuilder &builder) : _builder(builder) - { - // DO NOTHING - } - -private: - void appendReLU(::arm_compute::ICLTensor *tensor); - void appendReLU1(::arm_compute::ICLTensor *tensor); - void appendReLU6(::arm_compute::ICLTensor *tensor); - -public: - void append(model::Activation code, ::arm_compute::ICLTensor *tensor); - -private: - IExecutionBuilder &_builder; -}; - -void ActivationBuilder::appendReLU(::arm_compute::ICLTensor *ifm_alloc) -{ - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(ifm_alloc, nullptr, act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _builder.append(std::move(acl_fn)); -} - -void ActivationBuilder::appendReLU1(::arm_compute::ICLTensor *ifm_alloc) -{ - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(ifm_alloc, nullptr, act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _builder.append(std::move(acl_fn)); -} - -void ActivationBuilder::appendReLU6(::arm_compute::ICLTensor *ifm_alloc) -{ - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f}; - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(ifm_alloc, nullptr, act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _builder.append(std::move(acl_fn)); -} - -void ActivationBuilder::append(model::Activation code, ::arm_compute::ICLTensor *ifm_alloc) -{ - switch (code) - { - case model::Activation::NONE: - { - // DO NOTHING - break; - } - case model::Activation::RELU: - { - appendReLU(ifm_alloc); - break; - } - case model::Activation::RELU1: - { - appendReLU1(ifm_alloc); - break; - } - case model::Activation::RELU6: - { - appendReLU6(ifm_alloc); - break; - } - default: - { - throw std::runtime_error("Not supported, yet"); - } - } -} - -// // StageGenerator // StageGenerator::StageGenerator(const neurun::model::Operands &ctx, @@ -151,3182 +55,115 @@ StageGenerator::StageGenerator(const neurun::model::Operands &ctx, // DO NOTHING } -void StageGenerator::visit(const model::operation::CastNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::CastNode::Input::INPUT)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::Conv2DNode &node) -{ - using model::operation::Conv2DNode; - - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(Conv2DNode::Input::INPUT)}; - const auto ker_index{node.getInputs().at(Conv2DNode::Input::KERNEL)}; - const auto bias_index{node.getInputs().at(Conv2DNode::Input::BIAS)}; - - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); - // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. - const auto &ker_shape = _ctx.at(ker_index).shape(); - const auto ker_height = ker_shape.dim(1); - const auto ker_width = ker_shape.dim(2); - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - model::OperandIndex ker_index; - model::OperandIndex bias_index; - - model::ExplicitPadding padding; - model::Stride stride; - model::Activation activation; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.ker_index = ker_index; - param.bias_index = bias_index; - - param.stride = node.param().stride; - param.padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, - param.stride, ker_width, ker_height); - param.activation = node.param().activation; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - auto ker_alloc = tensors->at(param.ker_index).get(); - auto bias_alloc = tensors->at(param.bias_index).get(); - - const auto conv_info = acl_common::asPadStrideInfo(param.padding, param.stride); - const auto act_info = acl_common::asActivationLayerInfo(param.activation); - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLConvolutionLayer>(); - - fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), - ofm_alloc->handle(), conv_info, ::arm_compute::WeightsInfo(), - ::arm_compute::Size2D(1U, 1U), act_info); - - builder.append(asAclFunction(std::move(fn))); - }); -} - -void StageGenerator::visit(const model::operation::DepthwiseConv2DNode &node) -{ - using model::operation::DepthwiseConv2DNode; - - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(DepthwiseConv2DNode::Input::INPUT)}; - const auto ker_index{node.getInputs().at(DepthwiseConv2DNode::Input::KERNEL)}; - const auto bias_index{node.getInputs().at(DepthwiseConv2DNode::Input::BIAS)}; - - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); - // Kernel format is [1, kernel_height, kernel_width, depth_out]. - const auto &ker_shape = _ctx.at(ker_index).shape(); - const auto ker_height = ker_shape.dim(1); - const auto ker_width = ker_shape.dim(2); - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - model::OperandIndex ker_index; - model::OperandIndex bias_index; - - model::ExplicitPadding padding; - neurun::model::Stride stride; - int multiplier; - - model::Activation activation; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.ker_index = ker_index; - param.bias_index = bias_index; - - param.stride = node.param().stride; - param.padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, - param.stride, ker_width, ker_height); - param.multiplier = node.param().multiplier; - param.activation = node.param().activation; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - auto ker_alloc = tensors->at(param.ker_index).get(); - auto bias_alloc = tensors->at(param.bias_index).get(); - - const auto conv_info = acl_common::asPadStrideInfo(param.padding, param.stride); - // TODO Use `param.activation` instead of `model::Activation::NONE`. See below. - const auto act_info = acl_common::asActivationLayerInfo(model::Activation::NONE); - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); - - fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), - ofm_alloc->handle(), conv_info, param.multiplier, act_info); - - builder.append(asAclFunction(std::move(fn))); - - // TODO Use fused activation instead of separate layer after switching to ACL version >= v19.05. - // Prior versions had a bug due to which the fused activation did not apply in some cases. - ActivationBuilder{builder}.append(param.activation, ofm_alloc->handle()); - }); -} - -void StageGenerator::visit(const model::operation::MaxPool2DNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::MaxPool2DNode::Input::INPUT)}; - - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - - uint32_t kw; - uint32_t kh; - - model::ExplicitPadding padding; - model::Stride stride; - model::Activation activation; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - param.kh = node.param().kh; - param.kw = node.param().kw; - param.stride = node.param().stride; - param.padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, - param.stride, param.kw, param.kh); - param.activation = node.param().activation; - - VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl; - VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl; - VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl; - VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl; - VERBOSE(MaxPool2D) << "KER_H: " << param.kh << std::endl; - VERBOSE(MaxPool2D) << "KER_W: " << param.kw << std::endl; - VERBOSE(MaxPool2D) << "STRIDE_H: " << param.stride.vertical << std::endl; - VERBOSE(MaxPool2D) << "STRIDE_W: " << param.stride.horizontal << std::endl; - VERBOSE(MaxPool2D) << "PAD(T): " << param.padding.top << std::endl; - VERBOSE(MaxPool2D) << "PAD(B): " << param.padding.bottom << std::endl; - VERBOSE(MaxPool2D) << "PAD(L): " << param.padding.left << std::endl; - VERBOSE(MaxPool2D) << "PAD(R): " << param.padding.right << std::endl; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX, - ::arm_compute::Size2D{param.kw, param.kh}, - acl_common::asPadStrideInfo(param.padding, param.stride)}; - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>(); - - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append((std::move(acl_fn))); - - ActivationBuilder{builder}.append(param.activation, ofm_alloc->handle()); - }); -} - -void StageGenerator::visit(const model::operation::AvgPool2DNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::AvgPool2DNode::Input::INPUT)}; - - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - - uint32_t kw; - uint32_t kh; - - model::ExplicitPadding padding; - model::Stride stride; - model::Activation activation; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - param.kh = node.param().kh; - param.kw = node.param().kw; - param.stride = node.param().stride; - param.padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, - param.stride, param.kw, param.kh); - param.activation = node.param().activation; - - VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl; - VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl; - VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl; - VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl; - VERBOSE(AvgPool2D) << "KER_H: " << param.kh << std::endl; - VERBOSE(AvgPool2D) << "KER_W: " << param.kw << std::endl; - VERBOSE(AvgPool2D) << "STRIDE_H: " << param.stride.vertical << std::endl; - VERBOSE(AvgPool2D) << "STRIDE_W: " << param.stride.horizontal << std::endl; - VERBOSE(AvgPool2D) << "PAD(T): " << param.padding.top << std::endl; - VERBOSE(AvgPool2D) << "PAD(B): " << param.padding.bottom << std::endl; - VERBOSE(AvgPool2D) << "PAD(L): " << param.padding.left << std::endl; - VERBOSE(AvgPool2D) << "PAD(R): " << param.padding.right << std::endl; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - ::arm_compute::PoolingLayerInfo info{ - ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{param.kw, param.kh}, - acl_common::asPadStrideInfo(param.padding, param.stride), true /* exclude_padding */}; - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>(); - - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append((std::move(acl_fn))); - - ActivationBuilder{builder}.append(param.activation, ofm_alloc->handle()); - }); -} - -void StageGenerator::visit(const model::operation::ConcatNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto axis_index{node.param().axis_index}; - - struct Param - { - model::OperandIndex output_index; - std::vector input_indexes; - - int32_t axis; - }; - - Param param; - - param.output_index = ofm_index; - for (const auto &e : node.getInputs()) - param.input_indexes.emplace_back(e); - param.axis = _ctx.at(axis_index).asScalar(); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - // If tensor allocator allocate as subtensor - bool canEliminate = true; - for (auto ifm_ind : param.input_indexes) - { - if (!tensors->isSubTensorOf(param.output_index, ifm_ind)) - { - canEliminate = false; - break; - } - } - if (canEliminate) - { - // If concat eliminated, return a NOP IFunction - builder.append(nnfw::cpp14::make_unique()); - return; - } - - auto output_alloc = tensors->at(param.output_index).get(); - - std::vector<::neurun::backend::acl_cl::operand::ICLTensor *> input_allocs; - for (auto ifm_ind : param.input_indexes) - { - input_allocs.emplace_back(tensors->at(ifm_ind).get()); - } - - auto fn = nnfw::cpp14::make_unique<::neurun::backend::acl_cl::kernel::ConcatLayer>(); - - fn->configure(input_allocs, param.axis, output_alloc); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::FullyConnectedNode &node) -{ - using model::operation::FullyConnectedNode; - - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(FullyConnectedNode::Input::INPUT)}; - const auto weight_index{node.getInputs().at(FullyConnectedNode::Input::WEIGHT)}; - const auto bias_index{node.getInputs().at(FullyConnectedNode::Input::BIAS)}; - - auto tensors = _tensor_builder; - - const auto input_rank = _ctx.at(input_index).shape().rank(); - // TODO Currently we are not handling where the case is that the input's rank is 3. - // The handling should be added in the future. - assert(input_rank != 3); - - const auto output_size = _ctx.at(output_index).shape().dim(1); - UNUSED_RELEASE(output_size); - assert(_ctx.at(bias_index).shape().dim(0) == output_size); - assert(_ctx.at(weight_index).shape().dim(0) == output_size); - const auto batch_size = _ctx.at(output_index).shape().dim(0); - const auto input_size = _ctx.at(weight_index).shape().dim(1); - - // Check for reshaping input's shape into rank-2 - bool needs_reshape = false; - neurun::model::Shape reshape(2); - if (input_rank == 4) - { - // TODO Support NCHW frontend - model::FeatureShape ifm_shape_feature = _ctx.at(input_index).shape().asFeature(); - auto feature_size = - ifm_shape_feature.N * ifm_shape_feature.C * ifm_shape_feature.H * ifm_shape_feature.W; - - UNUSED_RELEASE(feature_size); - assert(feature_size == batch_size * input_size); - - // for reshaping - needs_reshape = true; - reshape.dim(0) = batch_size; /* H */ - reshape.dim(1) = input_size; /* W */ - } - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - - model::OperandIndex input_index; - model::OperandIndex weight_index; - model::OperandIndex bias_index; - - model::Activation activation; - - bool needs_reshape; - neurun::model::Shape reshape; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - param.weight_index = weight_index; - param.bias_index = bias_index; - - param.activation = node.param().activation; - - param.needs_reshape = needs_reshape; - param.reshape = reshape; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - auto weight_alloc = tensors->at(param.weight_index).get(); - auto bias_alloc = tensors->at(param.bias_index).get(); - auto acl_layout = output_alloc->handle()->info()->data_layout(); - - auto fn = nnfw::cpp14::make_unique(); - - fn->configure(input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), - output_alloc->handle(), param.needs_reshape, - ::neurun::backend::acl_common::asTensorShape( - param.reshape, ::neurun::backend::acl_common::asRuntimeLayout(acl_layout))); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - - ActivationBuilder{builder}.append(param.activation, output_alloc->handle()); - }); -} - -void StageGenerator::visit(const model::operation::MulNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(model::operation::MulNode::Input::LHS)}; - const auto rhs_index{node.getInputs().at(model::operation::MulNode::Input::RHS)}; - - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex lhs_index; - model::OperandIndex rhs_index; - - model::Activation activation; - }; - - Param param; - - param.ofm_index = output_index; - param.lhs_index = lhs_index; - param.rhs_index = rhs_index; - - param.activation = node.param().activation; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto lhs_alloc = tensors->at(param.lhs_index).get(); - auto rhs_alloc = tensors->at(param.rhs_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseMultiplication>(); - - l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - - ActivationBuilder{builder}.append(param.activation, ofm_alloc->handle()); - }); -} - -void StageGenerator::visit(const model::operation::ReduceSumNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::ReduceSumNode::Input::INPUT)}; - const auto axis_index{node.param().axis_index}; - - std::vector axes; - const auto axis_base = _ctx.at(axis_index).data().base(); - const auto axis_size = _ctx.at(axis_index).shape().num_elements(); - const auto input_rank = _ctx.at(input_index).shape().rank(); - - // The axis's data must exist as constant values - assert(axis_base != nullptr); - for (size_t n = 0; n < axis_size; ++n) - { - int32_t axis_value = *(reinterpret_cast(axis_base) + n); - if (axis_value < 0) - { - axis_value += input_rank; - } - axes.emplace_back( - ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, axis_value).value()); - } - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - - std::vector axes; - uint32_t input_rank; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - - param.axes = std::move(axes); - param.input_rank = input_rank; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](compiler::IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - std::set axes; - // TODO Support NCHW frontend - // TODO Change the layout of frontend and backend to be the same - auto acl_layout = input_alloc->handle()->info()->data_layout(); - // CWHN -> WHCN - uint32_t permutation[4] = {2, 0, 1, 3}; - for (size_t i = 0; i < param.axes.size(); ++i) - { - if (acl_layout == ::arm_compute::DataLayout::NCHW && param.input_rank == 4) - { - axes.insert(permutation[param.axes[i]]); - } - else - { - axes.insert(param.axes[i]); - } - } - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); - - fn->configure(input_alloc->handle(), output_alloc->handle(), axes, - ::arm_compute::ReduceOperation::SUM); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::ReshapeNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::ReshapeNode::Input::INPUT)}; - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - // NOTE This operation must not be changed the layout from frontend to backend - // However, this runtime can be change the layout of this operation from NHWC to NCHW now - // TODO Change the layout of frontend and backend to be the same and layer to CLReshapeLayer - auto fn = nnfw::cpp14::make_unique<::arm_compute::misc::GenericReshapeLayer>(); - - fn->configure(input_alloc->handle(), output_alloc->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::SqueezeNode &node) -{ - // Squeeze is identical to reshape except that it has an optional dimensions input. - // In addition, optional dims_index is ignored since output tensor already has squeezed shape - // by freezer and toco - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::SqueezeNode::Input::INPUT)}; - const auto dims_index{node.param().dims}; - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - }; - - Param param{output_index, input_index}; - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - auto fn = nnfw::cpp14::make_unique(); - fn->configure(input_alloc->handle(), output_alloc->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::TanhNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::TanhNode::Input::INPUT)}; - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - auto fn = nnfw::cpp14::make_unique(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; - - fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::SoftmaxNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::SoftmaxNode::Input::INPUT)}; - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - float beta; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - param.beta = node.param().beta; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSoftmaxLayer>(); - - fn->configure(input_alloc->handle(), output_alloc->handle(), param.beta); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::StridedSliceNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::StridedSliceNode::Input::INPUT)}; - const auto startData_index{node.param().startData_index}; - const auto endData_index{node.param().endData_index}; - const auto stridesData_index{node.param().stridesData_index}; - const auto beginMask_index{node.param().beginMask_index}; - const auto endMask_index{node.param().endMask_index}; - const auto shrinkAxisMask_index{node.param().shrinkAxisMask_index}; - - // Set initializers for indices data such as order of inputData - int input_rank = _ctx.at(input_index).shape().rank(); - std::vector starts; - std::vector ends; - std::vector strides; - starts.resize(input_rank, 0); - ends.resize(input_rank, 0); - strides.resize(input_rank, 0); - { - auto input_shape = _ctx.at(input_index).shape(); - auto startData_base = _ctx.at(startData_index).data().base(); - auto endData_base = _ctx.at(endData_index).data().base(); - auto stridesData_base = _ctx.at(stridesData_index).data().base(); - const int startData_size = _ctx.at(startData_index).shape().num_elements(); - const int endData_size = _ctx.at(endData_index).shape().num_elements(); - const int stridesData_size = _ctx.at(stridesData_index).shape().num_elements(); - - using neurun::model::DataType; - - UNUSED_RELEASE(startData_size); - UNUSED_RELEASE(endData_size); - UNUSED_RELEASE(stridesData_size); - - assert(_ctx.at(startData_index).typeInfo().type() == DataType::INT32); - assert(_ctx.at(endData_index).typeInfo().type() == DataType::INT32); - assert(_ctx.at(stridesData_index).typeInfo().type() == DataType::INT32); - assert(startData_size == input_rank); - assert(endData_size == input_rank); - assert(stridesData_size == input_rank); - - assert(startData_base != nullptr); - for (int n = 0; n < input_rank; ++n) - { - auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n).value(); - - int32_t start_value = *(reinterpret_cast(startData_base) + n); - starts[axis] = start_value; - - int32_t end_value = *(reinterpret_cast(endData_base) + n); - ends[axis] = end_value; +void StageGenerator::visit(const model::operation::CastNode &) {} - int32_t strides_value = *(reinterpret_cast(stridesData_base) + n); - strides[axis] = strides_value; - } - } +void StageGenerator::visit(const model::operation::Conv2DNode &) {} - struct Param - { - model::OperandIndex outputData_index; - model::OperandIndex inputData_index; +void StageGenerator::visit(const model::operation::DepthwiseConv2DNode &) {} - std::vector starts; - std::vector ends; - std::vector strides; +void StageGenerator::visit(const model::operation::MaxPool2DNode &) {} - int32_t beginMask; - int32_t endMask; - int32_t shrinkAxisMask; - }; +void StageGenerator::visit(const model::operation::AvgPool2DNode &) {} - Param param; - param.outputData_index = output_index; - param.inputData_index = input_index; +void StageGenerator::visit(const model::operation::ConcatNode &) {} - param.starts = starts; - param.ends = ends; - param.strides = strides; +void StageGenerator::visit(const model::operation::FullyConnectedNode &) {} - // Set mask bits such as order of inputData - param.beginMask = ::neurun::backend::acl_common::ReorderBits( - _ctx.at(beginMask_index).asScalar(), input_rank); - param.endMask = ::neurun::backend::acl_common::ReorderBits( - _ctx.at(endMask_index).asScalar(), input_rank); - param.shrinkAxisMask = ::neurun::backend::acl_common::ReorderBits( - _ctx.at(shrinkAxisMask_index).asScalar(), input_rank); +void StageGenerator::visit(const model::operation::MulNode &) {} - auto tensors = _tensor_builder; +void StageGenerator::visit(const model::operation::ReduceSumNode &) {} - returnStage([tensors, param](IExecutionBuilder &builder) { - auto outputData_alloc = tensors->at(param.outputData_index).get(); - auto inputData_alloc = tensors->at(param.inputData_index).get(); +void StageGenerator::visit(const model::operation::ReshapeNode &) {} - ::arm_compute::Coordinates starts; - ::arm_compute::Coordinates ends; - ::arm_compute::BiStrides strides; +void StageGenerator::visit(const model::operation::SqueezeNode &) {} - for (size_t i = 0; i < param.starts.size(); ++i) - { - starts.set(i, param.starts[i]); - ends.set(i, param.ends[i]); - strides.set(i, param.strides[i]); - } +void StageGenerator::visit(const model::operation::TanhNode &) {} - std::unique_ptr<::arm_compute::IFunction> fn; +void StageGenerator::visit(const model::operation::SoftmaxNode &) {} - auto l = nnfw::cpp14::make_unique<::arm_compute::CLStridedSlice>(); +void StageGenerator::visit(const model::operation::StridedSliceNode &) {} - l->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts, ends, strides, - param.beginMask, param.endMask, param.shrinkAxisMask); +void StageGenerator::visit(const model::operation::TransposeNode &) {} - fn = std::move(l); +void StageGenerator::visit(const model::operation::AddNode &) {} - auto acl_fn = asAclFunction(std::move(fn)); +void StageGenerator::visit(const model::operation::SubNode &) {} - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::TransposeNode &node) -{ - const auto ofm_idx{node.getOutputs().at(0)}; - const auto ifm_idx{node.getInputs().at(model::operation::TransposeNode::Input::INPUT)}; - const auto perm{node.param().perm}; - - const auto rank = _ctx.at(ifm_idx).shape().rank(); - std::vector pv; - const auto perm_base = _ctx.at(perm).data().base(); - const int perm_size = _ctx.at(perm).shape().num_elements(); - - assert(perm_base != nullptr); - for (int32_t n = 0; n < perm_size; ++n) - { - int32_t perm_value = *(reinterpret_cast(perm_base) + n); - assert(static_cast(perm_value) < rank); - pv.emplace_back(perm_value); - } - - struct Param - { - model::OperandIndex ifm_idx; - model::OperandIndex ofm_idx; - - std::vector pv; - uint32_t rank; - }; - - Param param; +void StageGenerator::visit(const model::operation::DivNode &) {} - param.ifm_idx = ifm_idx; - param.ofm_idx = ofm_idx; - param.pv = pv; - param.rank = rank; +void StageGenerator::visit(const model::operation::ExpNode &) {} - auto tensors = _tensor_builder; +void StageGenerator::visit(const model::operation::LogisticNode &) {} - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_idx).get(); - auto ifm_alloc = tensors->at(param.ifm_idx).get(); - // TODO Support NCHW frontend - // TODO Change the layout of frontend and backend to be the same - auto acl_layout = ifm_alloc->handle()->info()->data_layout(); - // Reversed - auto pv = ::neurun::backend::acl_common::getARMComputePermutationVector(param.rank, param.pv); - if (acl_layout == ::arm_compute::DataLayout::NCHW && param.rank == 4) - { - // CWHN -> WHCN - // C : 0 -> 2, W : 1 -> 0, H : 2 -> 1, N : 3 -> 3 - ::arm_compute::PermutationVector cwhn_to_whcn_pv; - uint32_t axis[4] = {2, 0, 1, 3}; - for (size_t i = 0; i < param.pv.size(); ++i) - { - cwhn_to_whcn_pv.set(axis[i], axis[pv[i]]); - } - pv = cwhn_to_whcn_pv; - } +void StageGenerator::visit(const model::operation::LogicalAndNode &) {} - std::unique_ptr<::arm_compute::IFunction> fn; +void StageGenerator::visit(const model::operation::LSTMNode &) {} - auto l = nnfw::cpp14::make_unique<::arm_compute::CLPermute>(); +void StageGenerator::visit(const model::operation::ReduceMaxNode &) {} - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv); +void StageGenerator::visit(const model::operation::ComparisonNode &) {} - fn = std::move(l); +void StageGenerator::visit(const model::operation::RSQRTNode &) {} - auto acl_fn = asAclFunction(std::move(fn)); +void StageGenerator::visit(const model::operation::ReLUNode &) {} - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::AddNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)}; - const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)}; - - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex lhs_index; - model::OperandIndex rhs_index; +void StageGenerator::visit(const model::operation::ResizeBilinearNode &) {} - model::Activation activation; - }; +void StageGenerator::visit(const model::operation::ReLU1Node &) {} - Param param; +void StageGenerator::visit(const model::operation::ReLU6Node &) {} - param.ofm_index = output_index; - param.lhs_index = lhs_index; - param.rhs_index = rhs_index; +void StageGenerator::visit(const model::operation::RNNNode &) {} - param.activation = node.param().activation; +void StageGenerator::visit(const model::operation::FloorNode &) {} - auto tensors = _tensor_builder; +void StageGenerator::visit(const model::operation::SpaceToDepthNode &) {} - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto lhs_alloc = tensors->at(param.lhs_index).get(); - auto rhs_alloc = tensors->at(param.rhs_index).get(); +void StageGenerator::visit(const model::operation::L2Pool2DNode &) {} - std::unique_ptr<::arm_compute::IFunction> fn; +void StageGenerator::visit(const model::operation::EmbeddingLookupNode &) {} - auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticAddition>(); +void StageGenerator::visit(const model::operation::L2NormalizationNode &) {} - l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), - arm_compute::ConvertPolicy::SATURATE); +void StageGenerator::visit(const model::operation::HashtableLookupNode &) {} - fn = std::move(l); +void StageGenerator::visit(const model::operation::PReLUNode &) {} - auto acl_fn = asAclFunction(std::move(fn)); +void StageGenerator::visit(const model::operation::TransposeConvNode &) {} - builder.append(std::move(acl_fn)); +void StageGenerator::visit(const model::operation::SQRTNode &) {} - ActivationBuilder{builder}.append(param.activation, ofm_alloc->handle()); - }); -} - -void StageGenerator::visit(const model::operation::SubNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(model::operation::SubNode::Input::LHS)}; - const auto rhs_index{node.getInputs().at(model::operation::SubNode::Input::RHS)}; +void StageGenerator::visit(const model::operation::LogicalOrNode &) {} - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex lhs_index; - model::OperandIndex rhs_index; +void StageGenerator::visit(const model::operation::LogicalNotNode &) {} - model::Activation activation; - }; +void StageGenerator::visit(const model::operation::SquaredDifferenceNode &) {} - Param param; +void StageGenerator::visit(const model::operation::TopKV2Node &) {} - param.ofm_index = output_index; - param.lhs_index = lhs_index; - param.rhs_index = rhs_index; +void StageGenerator::visit(const model::operation::GatherNode &) {} - param.activation = node.param().activation; +void StageGenerator::visit(const model::operation::NegNode &) {} - auto tensors = _tensor_builder; +void StageGenerator::visit(const model::operation::AbsNode &) {} - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto lhs_alloc = tensors->at(param.lhs_index).get(); - auto rhs_alloc = tensors->at(param.rhs_index).get(); +void StageGenerator::visit(const model::operation::ArgMaxNode &) {} - std::unique_ptr<::arm_compute::IFunction> fn; +void StageGenerator::visit(const model::operation::DequantizeNode &) {} - auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticSubtraction>(); +void StageGenerator::visit(const model::operation::MeanNode &) {} - l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), - arm_compute::ConvertPolicy::SATURATE); +void StageGenerator::visit(const model::operation::LocalResponseNormalizationNode &) {} - fn = std::move(l); +void StageGenerator::visit(const model::operation::DepthToSpaceNode &) {} - auto acl_fn = asAclFunction(std::move(fn)); +void StageGenerator::visit(const model::operation::ReduceMinNode &) {} - builder.append(std::move(acl_fn)); +void StageGenerator::visit(const model::operation::SplitNode &) {} - ActivationBuilder{builder}.append(param.activation, ofm_alloc->handle()); - }); -} +void StageGenerator::visit(const model::operation::UnpackNode &) {} -void StageGenerator::visit(const model::operation::DivNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(model::operation::DivNode::Input::LHS)}; - const auto rhs_index{node.getInputs().at(model::operation::DivNode::Input::RHS)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex lhs_index; - model::OperandIndex rhs_index; - - model::Activation activation; - }; - - Param param; - - param.ofm_index = output_index; - param.lhs_index = lhs_index; - param.rhs_index = rhs_index; - - param.activation = node.param().activation; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto lhs_alloc = tensors->at(param.lhs_index).get(); - auto rhs_alloc = tensors->at(param.rhs_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticDivision>(); - - l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - - ActivationBuilder{builder}.append(param.activation, ofm_alloc->handle()); - }); -} - -void StageGenerator::visit(const model::operation::ExpNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::ExpNode::Input::INPUT)}; - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLExpLayer>(); - - l->configure(input_alloc->handle(), output_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::LogisticNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::LogisticNode::Input::INPUT)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - }; - - Param param; - - param.ofm_index = output_index; - param.ifm_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::LogicalAndNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT1)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input0_index; - model::OperandIndex input1_index; - }; - - Param param; - - param.output_index = output_index; - param.input0_index = input0_index; - param.input1_index = input1_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input0_alloc = tensors->at(param.input0_index).get(); - auto input1_alloc = tensors->at(param.input1_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>(); - - l->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(), - ::arm_compute::BinaryLogicalOperation::AND); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::LSTMNode &node) -{ - // TODO Support dynamic rnn - // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection. - const auto scratch_buffer_index{ - node.getOutputs().at(model::operation::LSTMNode::Output::SCRATCH_BUFFER)}; - const auto output_state_out_index{ - node.getOutputs().at(model::operation::LSTMNode::Output::OUTPUT_STATE_OUT)}; - const auto cell_state_out_index{ - node.getOutputs().at(model::operation::LSTMNode::Output::CELL_STATE_OUT)}; - const auto output_index{node.getOutputs().at(model::operation::LSTMNode::Output::OUTPUT)}; - - const auto input_index{node.getInputs().at(model::operation::LSTMNode::Input::INPUT)}; - const auto input_to_input_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional - const auto input_to_forget_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::INPUT_TO_FORGET_WEIGHTS)}; - const auto input_to_cell_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::INPUT_TO_CELL_WEIGHTS)}; - const auto input_to_output_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::INPUT_TO_OUTPUT_WEIGHTS)}; - const auto recurrent_to_input_weights_index{node.getInputs().at( - model::operation::LSTMNode::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional - const auto recurrent_to_forget_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::RECURRENT_TO_FORGET_WEIGHTS)}; - const auto recurrent_to_cell_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::RECURRENT_TO_CELL_WEIGHTS)}; - const auto recurrent_to_output_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::RECURRENT_TO_OUTPUT_WEIGHTS)}; - const auto cell_to_input_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::CELL_TO_INPUT_WEIGHTS)}; // optional - const auto cell_to_forget_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::CELL_TO_FORGET_WEIGHTS)}; // optional - const auto cell_to_output_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional - const auto input_gate_bias_index{ - node.getInputs().at(model::operation::LSTMNode::Input::INPUT_GATE_BIAS)}; - const auto forget_gate_bias_index{ - node.getInputs().at(model::operation::LSTMNode::Input::FORGET_GATE_BIAS)}; - const auto cell_bias_index{node.getInputs().at(model::operation::LSTMNode::Input::CELL_BIAS)}; - const auto output_gate_bias_index{ - node.getInputs().at(model::operation::LSTMNode::Input::OUTPUT_GATE_BIAS)}; - const auto projection_weights_index{ - node.getInputs().at(model::operation::LSTMNode::Input::PROJECTION_WEIGHTS)}; // optional - const auto projection_bias_index{ - node.getInputs().at(model::operation::LSTMNode::Input::PROJECTION_BIAS)}; // optional - const auto output_state_in_index{ - node.getInputs().at(model::operation::LSTMNode::Input::OUTPUT_STATE_IN)}; - const auto cell_state_in_index{ - node.getInputs().at(model::operation::LSTMNode::Input::CELL_STATE_IN)}; - const auto cell_threshold = node.param().cell_threshold; - const auto projection_threshold = node.param().projection_threshold; - - bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(input_to_input_weights_index).shape().dim(1) != 0; - bool has_recurrent_to_input_weights = - _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 && - _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0; - bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0; - bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0; - bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 && - _ctx.at(projection_weights_index).shape().dim(1) != 0; - bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0); - - // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG. - // true: no CIFG - // false: CIFG - // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG). - bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights; - - // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole. - // But the cell_to_input_weights does not exist in regular CIFG although peephole. - // true: peephole - // false: no peephole - bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights; - - // NOTE Although the projection weights has data the projection bias may not have data. - bool has_projection_param = has_projection_weights; - - struct Param - { - model::OperandIndex scratch_buffer_index; - model::OperandIndex output_state_out_index; - model::OperandIndex cell_state_out_index; - model::OperandIndex output_index; - - model::OperandIndex input_index; - model::OperandIndex input_to_forget_weights_index; - model::OperandIndex input_to_cell_weights_index; - model::OperandIndex input_to_output_weights_index; - model::OperandIndex recurrent_to_forget_weights_index; - model::OperandIndex recurrent_to_cell_weights_index; - model::OperandIndex recurrent_to_output_weights_index; - model::OperandIndex forget_gate_bias_index; - model::OperandIndex cell_bias_index; - model::OperandIndex output_gate_bias_index; - model::OperandIndex output_state_in_index; - model::OperandIndex cell_state_in_index; - model::Activation activation; - float cell_clip; - float projection_clip; - - // CIFG params - model::OperandIndex input_to_input_weights_index; - model::OperandIndex recurrent_to_input_weights_index; - model::OperandIndex cell_to_input_weights_index; - model::OperandIndex input_gate_bias_index; - - // peephole params - model::OperandIndex cell_to_forget_weights_index; - model::OperandIndex cell_to_output_weights_index; - - // projection params - model::OperandIndex projection_weights_index; - model::OperandIndex projection_bias_index; - - // LSTM options - bool has_cifg_param; - bool has_peephole_param; - bool has_projection_param; - bool has_projection_bias; - }; - - Param param; - - param.scratch_buffer_index = scratch_buffer_index; - param.output_state_out_index = output_state_out_index; - param.cell_state_out_index = cell_state_out_index; - param.output_index = output_index; - - param.input_index = input_index; - param.input_to_input_weights_index = input_to_input_weights_index; - param.input_to_forget_weights_index = input_to_forget_weights_index; - param.input_to_cell_weights_index = input_to_cell_weights_index; - param.input_to_output_weights_index = input_to_output_weights_index; - param.recurrent_to_input_weights_index = recurrent_to_input_weights_index; - param.recurrent_to_forget_weights_index = recurrent_to_forget_weights_index; - param.recurrent_to_cell_weights_index = recurrent_to_cell_weights_index; - param.recurrent_to_output_weights_index = recurrent_to_output_weights_index; - param.cell_to_input_weights_index = cell_to_input_weights_index; - param.cell_to_forget_weights_index = cell_to_forget_weights_index; - param.cell_to_output_weights_index = cell_to_output_weights_index; - param.input_gate_bias_index = input_gate_bias_index; - param.forget_gate_bias_index = forget_gate_bias_index; - param.cell_bias_index = cell_bias_index; - param.output_gate_bias_index = output_gate_bias_index; - param.projection_weights_index = projection_weights_index; - param.projection_bias_index = projection_bias_index; - param.output_state_in_index = output_state_in_index; - param.cell_state_in_index = cell_state_in_index; - param.activation = node.param().activation; - param.cell_clip = cell_threshold; - param.projection_clip = projection_threshold; - assert(param.cell_clip >= 0.f && param.projection_clip >= 0.f); - - param.has_cifg_param = has_cifg_param; - param.has_peephole_param = has_peephole_param; - param.has_projection_param = has_projection_param; - param.has_projection_bias = has_projection_bias; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto scratch_buffer_alloc = tensors->at(param.scratch_buffer_index).get(); - auto output_state_out_alloc = tensors->at(param.output_state_out_index).get(); - auto cell_state_out_alloc = tensors->at(param.cell_state_out_index).get(); - auto output_alloc = tensors->at(param.output_index).get(); - - auto input_alloc = tensors->at(param.input_index).get(); - ; - auto input_to_forget_weights_alloc = tensors->at(param.input_to_forget_weights_index).get(); - auto input_to_cell_weights_alloc = tensors->at(param.input_to_cell_weights_index).get(); - auto input_to_output_weights_alloc = tensors->at(param.input_to_output_weights_index).get(); - auto recurrent_to_forget_weights_alloc = - tensors->at(param.recurrent_to_forget_weights_index).get(); - auto recurrent_to_cell_weights_alloc = tensors->at(param.recurrent_to_cell_weights_index).get(); - auto recurrent_to_output_weights_alloc = - tensors->at(param.recurrent_to_output_weights_index).get(); - - auto forget_gate_bias_alloc = tensors->at(param.forget_gate_bias_index).get(); - auto cell_bias_alloc = tensors->at(param.cell_bias_index).get(); - auto output_gate_bias_alloc = tensors->at(param.output_gate_bias_index).get(); - auto output_state_in_alloc = tensors->at(param.output_state_in_index).get(); - auto cell_state_in_alloc = tensors->at(param.cell_state_in_index).get(); - - auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(param.activation); - ; - auto cell_clip = param.cell_clip; - auto proj_clip = param.projection_clip; - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLLSTMLayer>(); - - ::arm_compute::LSTMParams<::arm_compute::ICLTensor> lstm_params{}; - if (param.has_cifg_param) - { - auto input_to_input_weights_alloc = - tensors->at(param.input_to_input_weights_index).get(); // optional - auto recurrent_to_input_weights_alloc = - tensors->at(param.recurrent_to_input_weights_index).get(); // optional - auto cell_to_input_weights_handle = - param.has_peephole_param ? tensors->at(param.cell_to_input_weights_index).get()->handle() - : nullptr; // optional (non-cifg && peephole) - auto input_gate_bias_alloc = tensors->at(param.input_gate_bias_index).get(); // optional - lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(), - recurrent_to_input_weights_alloc->handle(), - cell_to_input_weights_handle, input_gate_bias_alloc->handle()); - } - if (param.has_peephole_param) - { - auto cell_to_forget_weights_alloc = - tensors->at(param.cell_to_forget_weights_index).get(); // optional - auto cell_to_output_weights_alloc = - tensors->at(param.cell_to_output_weights_index).get(); // optional - lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(), - cell_to_output_weights_alloc->handle()); - } - if (param.has_projection_param) - { - auto projection_weights_alloc = tensors->at(param.projection_weights_index).get(); // optional - auto projection_bias_handle = param.has_projection_bias - ? tensors->at(param.projection_bias_index).get()->handle() - : nullptr; // optional - lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle); - } - - l->configure(input_alloc->handle(), input_to_forget_weights_alloc->handle(), - input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(), - recurrent_to_forget_weights_alloc->handle(), - recurrent_to_cell_weights_alloc->handle(), - recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(), - cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), - output_state_in_alloc->handle(), cell_state_in_alloc->handle(), - scratch_buffer_alloc->handle(), output_state_out_alloc->handle(), - cell_state_out_alloc->handle(), output_alloc->handle(), lstm_params, act_info, - cell_clip, proj_clip); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::ReduceMaxNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::ReduceMaxNode::Input::INPUT)}; - const auto axis_index{node.param().axis_index}; - - auto input_shape = _ctx.at(input_index).shape(); - auto axis_shape = _ctx.at(axis_index).shape(); - - std::vector axis; - { - const auto ifm_rank = input_shape.rank(); - switch (axis_shape.rank()) - { - case 0: // scalar - { - int32_t axis_value = _ctx.at(axis_index).asScalar(); - if (axis_value < 0) - { - axis_value += ifm_rank; - } - axis.emplace_back( - ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); - break; - } - case 1: // vector - { - const auto axis_base = _ctx.at(axis_index).data().base(); - const int axis_size = axis_shape.num_elements(); - - // If axis's data does not exist as constant values and can be gotten as input data, we have - // to find a way to infer output shape when sinking output. - assert(axis_base != nullptr); - for (int32_t n = 0; n < axis_size; ++n) - { - int32_t axis_value = *(reinterpret_cast(axis_base) + n); - if (axis_value < 0) - { - axis_value += ifm_rank; - } - axis.emplace_back( - ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); - } - break; - } - default: - throw std::runtime_error("Not supported"); - break; - } - } - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - - std::vector axis_index; - uint32_t input_rank; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - param.axis_index = axis; - param.input_rank = input_shape.rank(); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.output_index).get(); - auto ifm_alloc = tensors->at(param.input_index).get(); - std::set axes; - // TODO Support NCHW frontend - // TODO Change the layout of frontend and backend to be the same - auto acl_layout = ifm_alloc->handle()->info()->data_layout(); - // CWHN -> WHCN - uint32_t permutation[4] = {2, 0, 1, 3}; - for (size_t i = 0; i < param.axis_index.size(); ++i) - { - if (acl_layout == ::arm_compute::DataLayout::NCHW && param.input_rank == 4) - { - axes.insert(permutation[param.axis_index[i]]); - } - else - { - axes.insert(param.axis_index[i]); - } - } - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), axes, arm_compute::ReduceOperation::MAX); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::ComparisonNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(model::operation::ComparisonNode::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(model::operation::ComparisonNode::Input::INPUT1)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input0_index; - model::OperandIndex input1_index; - - model::operation::ComparisonNode::ComparisonType comparison_type; - }; - - Param param; - - param.output_index = output_index; - param.input0_index = input0_index; - param.input1_index = input1_index; - - param.comparison_type = node.param().comparison_type; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input0_alloc = tensors->at(param.input0_index).get(); - auto input1_alloc = tensors->at(param.input1_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLComparison>(); - - l->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(), - (arm_compute::ComparisonOperation)param.comparison_type); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::RSQRTNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::LogisticNode::Input::INPUT)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - }; - - Param param; - - param.ofm_index = output_index; - param.ifm_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLRsqrtLayer>(); - - fn->configure(ifm_alloc->handle(), ofm_alloc->handle()); - - builder.append(asAclFunction(std::move(fn))); - }); -} - -void StageGenerator::visit(const model::operation::ReLUNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::ReLUNode::Input::INPUT)}; - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - auto fn = nnfw::cpp14::make_unique(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; - - fn->configure(input_alloc->handle(), output_alloc->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::ResizeBilinearNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - - const auto ifm_index{node.getInputs().at(model::operation::ResizeBilinearNode::Input::INPUT)}; - const auto height_index{node.param().height_index}; - const auto width_index{node.param().width_index}; - - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - - int32_t new_height; - int32_t new_width; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.new_height = _ctx.at(height_index).asScalar(); - param.new_width = _ctx.at(width_index).asScalar(); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLScale>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), - ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE, - ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::ReLU1Node &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::ReLU1Node::Input::INPUT)}; - - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::ReLU6Node &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::ReLU6Node::Input::INPUT)}; - - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f}; - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::RNNNode &node) -{ - const auto output_index{node.getOutputs().at(model::operation::RNNNode::Output::OUTPUT)}; - const auto hidden_state_out_index{ - node.getOutputs().at(model::operation::RNNNode::Output::HIDDEN_STATE_OUT)}; - - const auto input_index{node.getInputs().at(model::operation::RNNNode::Input::INPUT)}; - const auto weights_index{node.getInputs().at(model::operation::RNNNode::Input::WEIGHTS)}; - const auto recurrent_weights_index{ - node.getInputs().at(model::operation::RNNNode::Input::RECURRENT_WEIGHTS)}; - const auto bias_index{node.getInputs().at(model::operation::RNNNode::Input::BIAS)}; - const auto hidden_state_in_index{ - node.getInputs().at(model::operation::RNNNode::Input::HIDDEN_STATE_IN)}; - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex hidden_state_out_index; - - model::OperandIndex input_index; - model::OperandIndex weights_index; - model::OperandIndex recurrent_weights_index; - model::OperandIndex bias_index; - model::OperandIndex hidden_state_in_index; - model::Activation activation; - }; - - Param param; - - param.output_index = output_index; - param.hidden_state_out_index = hidden_state_out_index; - - param.input_index = input_index; - param.weights_index = weights_index; - param.recurrent_weights_index = recurrent_weights_index; - param.bias_index = bias_index; - param.hidden_state_in_index = hidden_state_in_index; - param.activation = node.param().activation; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto hidden_state_out_alloc = tensors->at(param.hidden_state_out_index).get(); - - auto input_alloc = tensors->at(param.input_index).get(); - auto weights_alloc = tensors->at(param.weights_index).get(); - auto recurrent_weights_alloc = tensors->at(param.recurrent_weights_index).get(); - auto bias_alloc = tensors->at(param.bias_index).get(); - auto hidden_state_in_alloc = tensors->at(param.hidden_state_in_index).get(); - auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(param.activation); - - auto copy_layer = nnfw::cpp14::make_unique<::arm_compute::CLCopy>(); - copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle()); - builder.append(asAclFunction(std::move(copy_layer))); - - std::unique_ptr<::arm_compute::IFunction> fn; - auto rnn_layer = nnfw::cpp14::make_unique<::arm_compute::CLRNNLayerEx>(); - rnn_layer->configure(input_alloc->handle(), weights_alloc->handle(), - recurrent_weights_alloc->handle(), bias_alloc->handle(), - hidden_state_out_alloc->handle(), output_alloc->handle(), act_info); - fn = std::move(rnn_layer); - builder.append(asAclFunction(std::move(fn))); - }); -} - -void StageGenerator::visit(const model::operation::FloorNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::FloorNode::Input::INPUT)}; - - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLFloor>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::SpaceToDepthNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::SpaceToDepthNode::Input::INPUT)}; - const auto block_size_index{node.param().block_size_index}; - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - - int32_t block_size; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.block_size = _ctx.at(block_size_index).asScalar(); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToDepth>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), param.block_size); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::L2Pool2DNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::L2Pool2DNode::Input::INPUT)}; - - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - - uint32_t kw; - uint32_t kh; - - model::ExplicitPadding padding; - model::Stride stride; - model::Activation activation; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - param.kw = node.param().kw; - param.kh = node.param().kh; - param.stride = node.param().stride; - param.padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, - param.stride, param.kw, param.kh); - param.activation = node.param().activation; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - ::arm_compute::PoolingLayerInfo info{ - ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{param.kw, param.kh}, - ::neurun::backend::acl_common::asPadStrideInfo(param.padding, param.stride)}; - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLPoolingLayer>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), info); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - - ActivationBuilder{builder}.append(param.activation, ofm_alloc->handle()); - }); -} - -void StageGenerator::visit(const model::operation::EmbeddingLookupNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lookups_index{ - node.getInputs().at(model::operation::EmbeddingLookupNode::Input::LOOKUPS)}; - const auto values_index{ - node.getInputs().at(model::operation::EmbeddingLookupNode::Input::VALUES)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - model::OperandIndex lookups_index; - model::OperandIndex values_index; - }; - - Param param; - - param.output_index = output_index; - param.lookups_index = lookups_index; - param.values_index = values_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto lookups_alloc = tensors->at(param.lookups_index).get(); - auto values_alloc = tensors->at(param.values_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLEmbeddingLookup>(); - - l->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::L2NormalizationNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::L2NormalizationNode::Input::INPUT)}; - - // {CL|Neon}L2Normalization performs the reduction only along dimension 0 - // L2 Normalization always performs the reduction along the depth axis - // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by - // choosing normalization parameters as below - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - - int32_t radius; - float alpha; - float beta; - float bias; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - param.radius = 2 * _ctx.at(ifm_index).shape().dim(3) + 1; // normSize = depth * 2 + 1 - param.alpha = 1.0f; // In the implementation to make alpha_ become 1 - param.beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) - param.bias = 0.0f; // Don't offset the reduction. - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - const auto norm_info = - ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, param.radius, - param.alpha, param.beta, param.bias, false); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::HashtableLookupNode &node) -{ - const auto output_index{ - node.getOutputs().at(model::operation::HashtableLookupNode::Output::OUTPUT)}; - const auto hits_index{node.getOutputs().at(model::operation::HashtableLookupNode::Output::HITS)}; - - const auto lookups_index{ - node.getInputs().at(model::operation::HashtableLookupNode::Input::LOOKUPS)}; - const auto keys_index{node.getInputs().at(model::operation::HashtableLookupNode::Input::KEYS)}; - const auto values_index{ - node.getInputs().at(model::operation::HashtableLookupNode::Input::VALUES)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - model::OperandIndex hits_index; - - model::OperandIndex lookups_index; - model::OperandIndex keys_index; - model::OperandIndex values_index; - }; - - Param param; - - param.output_index = output_index; - param.hits_index = hits_index; - - param.lookups_index = lookups_index; - param.keys_index = keys_index; - param.values_index = values_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto hits_alloc = tensors->at(param.hits_index).get(); - - auto lookups_alloc = tensors->at(param.lookups_index).get(); - auto keys_alloc = tensors->at(param.keys_index).get(); - auto values_alloc = tensors->at(param.values_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLHashtableLookup>(); - - l->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(), - output_alloc->handle(), hits_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::PReLUNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::PReLUNode::Input::INPUT)}; - const auto alpha_index{node.getInputs().at(model::operation::PReLUNode::Input::ALPHA)}; - - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - model::OperandIndex alpha_index; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.alpha_index = alpha_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - auto alpha_alloc = tensors->at(param.alpha_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLPReLU>(); - - l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::TransposeConvNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto output_shape_index{ - node.getInputs().at(model::operation::TransposeConvNode::Input::OUTPUT_SHAPE)}; - const auto ker_index{node.getInputs().at(model::operation::TransposeConvNode::Input::KERNEL)}; - const auto ifm_index{node.getInputs().at(model::operation::TransposeConvNode::Input::INPUT)}; - - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); - const auto ker_shape = _ctx.at(ker_index).shape().asFeature(); - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - model::OperandIndex ker_index; - - model::ExplicitPadding padding; - model::Stride stride; - uint32_t invalid_horizontal; - uint32_t invalid_vertical; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.ker_index = ker_index; - - param.stride = node.param().stride; - - assert((node.param().padding.type == model::PaddingType::SAME) || - (node.param().padding.type == model::PaddingType::VALID)); - param.padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, - param.stride, ker_shape.W, ker_shape.H); - if (node.param().padding.type == model::PaddingType::VALID) - { - param.invalid_horizontal = - ofm_shape.W - (1 + (ifm_shape.W - 1) * param.stride.horizontal) - (ker_shape.W - 1); - param.invalid_vertical = - ofm_shape.H - (1 + (ifm_shape.H - 1) * param.stride.vertical) - (ker_shape.H - 1); - } - else - { - param.invalid_horizontal = 0; - param.invalid_vertical = 0; - } - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - auto ker_alloc = tensors->at(param.ker_index).get(); - - const auto tconv_info = acl_common::asPadStrideInfo(param.padding, param.stride); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>(); - - l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info, - param.invalid_vertical, param.invalid_horizontal); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::SQRTNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::SQRTNode::Input::INPUT)}; - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); - - l->configure(input_alloc->handle(), output_alloc->handle(), act_info); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::LogicalOrNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT1)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input0_index; - model::OperandIndex input1_index; - }; - - Param param; - - param.output_index = output_index; - param.input0_index = input0_index; - param.input1_index = input1_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input0_alloc = tensors->at(param.input0_index).get(); - auto input1_alloc = tensors->at(param.input1_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseOr>(); - - l->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::LogicalNotNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::LogicalNotNode::Input::INPUT)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseNot>(); - - l->configure(input_alloc->handle(), output_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::SquaredDifferenceNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(model::operation::SquaredDifferenceNode::Input::LHS)}; - const auto rhs_index{node.getInputs().at(model::operation::SquaredDifferenceNode::Input::RHS)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex lhs_index; - model::OperandIndex rhs_index; - }; - - Param param; - - param.ofm_index = ofm_index; - param.lhs_index = lhs_index; - param.rhs_index = rhs_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto lhs_alloc = tensors->at(param.lhs_index).get(); - auto rhs_alloc = tensors->at(param.rhs_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseSquaredDiff>(); - - l->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::TopKV2Node &node) -{ - const auto outputValues_index{ - node.getOutputs().at(model::operation::TopKV2Node::Output::OUTPUT_VALUES)}; - const auto outputIndices_index{ - node.getOutputs().at(model::operation::TopKV2Node::Output::OUTPUT_INDICES)}; - - const auto inputData_index{node.getInputs().at(model::operation::TopKV2Node::Input::INPUT)}; - const auto k_index{node.param().k_index}; - - // Currently, we only support the vector input. - assert(_ctx.at(inputData_index).shape().rank() == 1 || - _ctx.at(inputData_index).shape().rank() == 2); - - const int32_t k = _ctx.at(k_index).asScalar(); - - // Construct operation parameters - struct Param - { - model::OperandIndex outputValues_index; - model::OperandIndex outputIndices_index; - - model::OperandIndex inputData_index; - int32_t k; - }; - - Param param; - - param.outputValues_index = outputValues_index; - param.outputIndices_index = outputIndices_index; - param.inputData_index = inputData_index; - param.k = k; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto values_alloc = tensors->at(param.outputValues_index).get(); - auto indices_alloc = tensors->at(param.outputIndices_index).get(); - auto input_alloc = tensors->at(param.inputData_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLTopKV2>(); - - l->configure(input_alloc->handle(), param.k, values_alloc->handle(), indices_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::GatherNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - - const auto ifm_index{node.getInputs().at(model::operation::GatherNode::Input::INPUT)}; - const auto indices_index{node.getInputs().at(model::operation::GatherNode::Input::INDICES)}; - - const auto axis_index{node.param().axis_index}; - - const auto ifm_shape = _ctx.at(ifm_index).shape(); - - const int32_t axis_value = static_cast(_ctx.at(axis_index).asScalar()); - const int axis = - ::neurun::backend::acl_common::ToARMComputeAxis(ifm_shape.rank(), axis_value).value(); - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - model::OperandIndex indices_index; - - int32_t axis; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.indices_index = indices_index; - - param.axis = axis; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - auto indices_alloc = tensors->at(param.indices_index).get(); - auto acl_layout = ofm_alloc->handle()->info()->data_layout(); - UNUSED_RELEASE(acl_layout); - - // NOTE The frontend layout and backend layout must be the same for this operation. - // If not the same, we have to add a stage(?) to perform permutation of output tensor. It - // is not not efficient even if it works well. If so, it would be better to set the - // layout of these backend tensors to the same layout. - // There is also one thing we have to think about. This operation depends on the layout of - // a model. For example, if a model in NHWC has this operation as output rank == 4, indices - // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W - // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. - // TODO Remove this workaround - // It is a workaround how to set the layout of these backend tensors to the layout of the - // frontend when creating them - // TODO Supports front-end in NCHW - // TODO Change the layout of frontend and backend to be the same - // assert(::arm_compute::DataLayout::NHWC == acl_layout); - assert(acl_layout == ifm_alloc->handle()->info()->data_layout()); - assert(acl_layout == indices_alloc->handle()->info()->data_layout()); - - std::unique_ptr<::arm_compute::IFunction> fn; - // TODO Change to CLGather - auto l = nnfw::cpp14::make_unique<::arm_compute::misc::GenericGather>(); - - l->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), param.axis); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::NegNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::NegNode::Input::INPUT)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLNeg>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::AbsNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::AbsNode::Input::INPUT)}; - - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>(); - - l->configure(input_alloc->handle(), output_alloc->handle(), act_info); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::ArgMaxNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::ArgMaxNode::Input::INPUT)}; - const auto axis_index{node.param().axis_index}; - - auto ifm_shape = _ctx.at(ifm_index).shape(); - auto ofm_shape = _ctx.at(ofm_index).shape(); - auto axis_shape = _ctx.at(axis_index).shape(); - - assert(_ctx.at(axis_index).isConstant()); - // Axis dimension is always 1. - assert(axis_shape.rank() == 1); - assert((ifm_shape.rank() - 1) == ofm_shape.rank()); - - std::vector l_axis; - const int axis_size = axis_shape.num_elements(); - auto axis_base = _ctx.at(axis_index).data().base(); - // TODO Should support axis size > 1. - assert(axis_size == 1); - // axis is tensor with 1 dimension - always a vector. - assert(axis_base != nullptr); - - for (int32_t n = 0; n < axis_size; ++n) - { - int32_t axis_value = *(reinterpret_cast(axis_base) + n); - if (axis_value < 0) - { - axis_value += ifm_shape.rank(); - } - l_axis.push_back(acl_common::ToARMComputeAxis(ifm_shape.rank(), axis_value).value()); - } - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - std::vector axis; - uint32_t ifm_rank; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.axis = l_axis; - param.ifm_rank = ifm_shape.rank(); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - auto axis = param.axis; - // TODO Support NCHW frontend - // TODO Change the layout of frontend and backend to be the same - auto acl_layout = ifm_alloc->handle()->info()->data_layout(); - if (acl_layout == ::arm_compute::DataLayout::NCHW && param.ifm_rank == 4) - { - // CWHN -> WHCN - uint32_t permutation[4] = {2, 0, 1, 3}; - for (size_t i = 0; i < axis.size(); ++i) - { - axis[i] = permutation[axis[i]]; - } - } - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLArgOperation>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), axis, ::arm_compute::ArgOperation::MAX); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::DequantizeNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::DequantizeNode::Input::INPUT)}; - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>(); - - l->configure(input_alloc->handle(), output_alloc->handle()); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::MeanNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::MeanNode::Input::INPUT)}; - - const auto axis_index{node.param().axis_index}; - const auto keep_dims_index{node.param().keep_dims_index}; - - const int keep_dims = _ctx.at(keep_dims_index).asScalar(); - - const auto ifm_shape = _ctx.at(ifm_index).shape(); - - std::vector axis; - { - const auto ifm_rank = ifm_shape.rank(); - const auto axis_shape = _ctx.at(axis_index).shape(); - switch (axis_shape.rank()) - { - case 0: // scalar - { - int32_t axis_value = _ctx.at(axis_index).asScalar(); - if (axis_value < 0) - { - axis_value += ifm_rank; - } - axis.emplace_back( - ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); - break; - } - case 1: // vector - { - const auto axis_base = _ctx.at(axis_index).data().base(); - const int axis_size = axis_shape.num_elements(); - - // If axis's data does not exist as constant values and can be gotten as input data, we have - // to find a way to infer output shape when sinking output. - assert(axis_base != nullptr); - for (int32_t n = 0; n < axis_size; ++n) - { - int32_t axis_value = *(reinterpret_cast(axis_base) + n); - if (axis_value < 0) - { - axis_value += ifm_rank; - } - axis.emplace_back( - ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); - } - break; - } - default: - throw std::runtime_error("Not supported"); - break; - } - } - - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - bool keep_dims; - std::vector axis; - uint32_t ifm_rank; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.keep_dims = keep_dims > 0 ? true : false; - param.axis = axis; - param.ifm_rank = ifm_shape.rank(); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - std::set axis; - // TODO Support NCHW frontend - // TODO Change the layout of frontend and backend to be the same - auto acl_layout = ifm_alloc->handle()->info()->data_layout(); - // CWHN -> WHCN - uint32_t permutation[4] = {2, 0, 1, 3}; - for (size_t i = 0; i < param.axis.size(); ++i) - { - if (acl_layout == ::arm_compute::DataLayout::NCHW && param.ifm_rank == 4) - { - axis.insert(permutation[param.axis[i]]); - } - else - { - axis.insert(param.axis[i]); - } - } - - std::unique_ptr<::arm_compute::IFunction> fn; - - // NOTE CLReduceMean has a bug that does not support NHWC layout - // CLReduceMean intermediate tensors are always NCHW layout - auto l = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), axis, - ::arm_compute::ReduceOperation::MEAN); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::LocalResponseNormalizationNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{ - node.getInputs().at(model::operation::LocalResponseNormalizationNode::Input::INPUT)}; - const auto radius_index{node.param().radius_index}; - const auto bias_index{node.param().bias_index}; - const auto alpha_index{node.param().alpha_index}; - const auto beta_index{node.param().beta_index}; - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - - int32_t radius; - float bias; - float alpha; - float beta; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - - param.radius = _ctx.at(radius_index).asScalar(); - param.alpha = _ctx.at(alpha_index).asScalar(); - param.beta = _ctx.at(beta_index).asScalar(); - param.bias = _ctx.at(bias_index).asScalar(); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - - const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, - param.radius * 2 + 1, param.alpha, - param.beta, param.bias, false); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::DepthToSpaceNode &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(model::operation::DepthToSpaceNode::Input::INPUT)}; - const auto block_size_index{node.param().block_size_index}; - - int32_t block_size = _ctx.at(block_size_index).asScalar(); - assert(block_size > 0); - - // Construct operation parameters - struct Param - { - model::OperandIndex output_index; - model::OperandIndex input_index; - int32_t block_size; - }; - - Param param; - - param.output_index = output_index; - param.input_index = input_index; - param.block_size = block_size; - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto output_alloc = tensors->at(param.output_index).get(); - auto input_alloc = tensors->at(param.input_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLDepthToSpace>(); - - l->configure(input_alloc->handle(), output_alloc->handle(), param.block_size); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::ReduceMinNode &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(model::operation::ReduceMinNode::Input::INPUT)}; - const auto axis_index{node.param().axis_index}; - - auto ifm_shape = _ctx.at(ifm_index).shape(); - auto ofm_shape = _ctx.at(ofm_index).shape(); - auto axis_shape = _ctx.at(axis_index).shape(); - - std::vector axis; - { - const auto ifm_rank = ifm_shape.rank(); - switch (axis_shape.rank()) - { - case 0: // scalar - { - int32_t axis_value = _ctx.at(axis_index).asScalar(); - if (axis_value < 0) - { - axis_value += ifm_rank; - } - axis.emplace_back( - ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); - break; - } - case 1: // vector - { - const auto axis_base = _ctx.at(axis_index).data().base(); - const int axis_size = axis_shape.num_elements(); - - // If axis's data does not exist as constant values and can be gotten as input data, we have - // to find a way to infer output shape when sinking output. - assert(axis_base != nullptr); - for (int32_t n = 0; n < axis_size; ++n) - { - int32_t axis_value = *(reinterpret_cast(axis_base) + n); - if (axis_value < 0) - { - axis_value += ifm_rank; - } - axis.emplace_back( - ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value()); - } - break; - } - default: - throw std::runtime_error("Not supported"); - break; - } - } - - // Construct operation parameters - struct Param - { - model::OperandIndex ofm_index; - model::OperandIndex ifm_index; - std::vector axis; - uint32_t ifm_rank; - }; - - Param param; - - param.ofm_index = ofm_index; - param.ifm_index = ifm_index; - param.axis = axis; - param.ifm_rank = ifm_shape.rank(); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ofm_alloc = tensors->at(param.ofm_index).get(); - auto ifm_alloc = tensors->at(param.ifm_index).get(); - std::set axis; - // TODO Support NCHW frontend - // TODO Change the layout of frontend and backend to be the same - auto acl_layout = ifm_alloc->handle()->info()->data_layout(); - // CWHN -> WHCN - uint32_t permutation[4] = {2, 0, 1, 3}; - for (size_t i = 0; i < param.axis.size(); ++i) - { - if (acl_layout == ::arm_compute::DataLayout::NCHW && param.ifm_rank == 4) - { - axis.insert(permutation[param.axis[i]]); - } - else - { - axis.insert(param.axis[i]); - } - } - - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>(); - - l->configure(ifm_alloc->handle(), ofm_alloc->handle(), axis, - ::arm_compute::ReduceOperation::MIN); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::SplitNode &node) -{ - const auto input_index{node.getInputs().at(model::operation::SplitNode::Input::INPUT)}; - const auto axis_index{node.param().axis_index}; - const auto num_of_splits_index{node.param().num_of_splits_index}; - - assert(_ctx.at(num_of_splits_index).asScalar() == node.getOutputs().size()); - - const auto ifm_rank = _ctx.at(input_index).shape().rank(); - - struct Param - { - model::OperandIndex ifm_index; - std::vector output_indexes; - int32_t axis; - uint32_t ifm_rank; - }; - - Param param; - param.ifm_index = input_index; - param.axis = _ctx.at(axis_index).asScalar(); - if (param.axis < 0) - param.axis += ifm_rank; - param.axis = acl_common::ToARMComputeAxis(ifm_rank, param.axis).value(); - param.ifm_rank = ifm_rank; - - for (const auto &e : node.getOutputs()) - param.output_indexes.emplace_back(e); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto ifm_alloc = tensors->at(param.ifm_index).get(); - std::vector output_allocs; - for (auto ofm_ind : param.output_indexes) - { - output_allocs.emplace_back(tensors->at(ofm_ind).get()->handle()); - } - auto axis = param.axis; - auto acl_layout = ifm_alloc->handle()->info()->data_layout(); - - if (acl_layout == ::arm_compute::DataLayout::NCHW && param.ifm_rank == 4) - { - // CWHN -> WHCN - uint32_t permutation[4] = {2, 0, 1, 3}; - axis = permutation[axis]; - } - - std::unique_ptr<::arm_compute::IFunction> fn; - - // TODO Support NCHW frontend - // TODO Change the layout of frontend and backend to be the same - auto l = nnfw::cpp14::make_unique<::arm_compute::CLSplit>(); - - l->configure(ifm_alloc->handle(), output_allocs, axis); - - fn = std::move(l); - - auto acl_fn = asAclFunction(std::move(fn)); - - builder.append(std::move(acl_fn)); - }); -} - -void StageGenerator::visit(const model::operation::UnpackNode &node) -{ - const auto input_index{node.getInputs().at(model::operation::UnpackNode::Input::INPUT)}; - const auto axis{node.param().axis}; - - const auto input_rank = _ctx.at(input_index).shape().rank(); - - struct Param - { - model::OperandIndex input_index; - std::vector output_indexes; - int32_t axis; - }; - - Param param; - param.input_index = input_index; - param.axis = axis; - if (param.axis < 0) - param.axis += input_rank; - param.axis = acl_common::ToARMComputeAxis(input_rank, param.axis).value(); - - for (const auto &output_index : node.getOutputs()) - param.output_indexes.emplace_back(output_index); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto input = tensors->at(param.input_index).get()->handle(); - std::vector outputs; - for (const auto output_index : param.output_indexes) - { - outputs.emplace_back(tensors->at(output_index)->handle()); - } - - int axis = param.axis; - if (input->info()->num_dimensions() == 4 && - input->info()->data_layout() == ::arm_compute::DataLayout::NCHW) - { - // CWHN -> WHCN - const int permutation[4] = {2, 0, 1, 3}; - axis = permutation[axis]; - } - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLUnstack>(); - - fn->configure(input, outputs, axis); - - builder.append(asAclFunction(std::move(fn))); - }); -} - -void StageGenerator::visit(const model::operation::PadNode &node) -{ - const auto input_index{node.getInputs().at(model::operation::PadNode::Input::INPUT)}; - const auto pad_index{node.getInputs().at(model::operation::PadNode::Input::PAD)}; - const auto output_index{node.getOutputs().at(0)}; - - struct Param - { - model::OperandIndex input_index; - model::OperandIndex output_index; - ::arm_compute::PixelValue pixel_value; - ::arm_compute::PaddingList padding_list; - }; - - assert(_ctx.at(pad_index).isConstant()); - - Param param; - param.input_index = input_index; - param.output_index = output_index; - - auto rank = _ctx.at(pad_index).shape().dim(0); - auto pad_base = _ctx.at(pad_index).data().base(); - param.padding_list.resize(rank); - for (int32_t n = 0; n < rank; ++n) - { - const int32_t *from = reinterpret_cast(pad_base) + (n * 2); - auto axis = acl_common::ToARMComputeAxis(rank, n).value(); - - param.padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]}; - } - - auto input_type = _ctx.at(input_index).typeInfo(); - auto data_type = acl_common::asDataType(input_type.type()); - auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()); - param.pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info); - - auto tensors = _tensor_builder; - - returnStage([tensors, param](IExecutionBuilder &builder) { - auto input = tensors->at(param.input_index).get()->handle(); - auto output = tensors->at(param.output_index).get()->handle(); - - auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPadLayer>(); - fn->configure(input, output, param.padding_list, param.pixel_value); - - builder.append(asAclFunction(std::move(fn))); - }); -} +void StageGenerator::visit(const model::operation::PadNode &) {} } // namespace acl_cl } // namespace backend -- 2.7.4