From 89ca51722c3b9ddc477b9df54132769c5003e40b Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EB=B0=95=EC=A2=85=ED=98=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Senior=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Mon, 16 Apr 2018 12:35:59 +0900 Subject: [PATCH] [Pure ACL Runtime] Support 'Concat' operation (#693) This commit introduces partial support on ANEURALNETWORKS_CONCATENATION operation. Only feature map concat over depth axis is supported currently. Signed-off-by: Jonghyun Park --- .../bindings/pure_arm_compute/src/compilation.cc | 162 ++++++++++++++++++++- .../pure_arm_compute/src/internal/arm_compute.h | 6 + .../pure_arm_compute/src/internal/op/Concat.cc | 53 +++++++ .../pure_arm_compute/src/internal/op/Concat.h | 56 +++++++ .../pure_arm_compute/src/internal/op/NodeVisitor.h | 2 + .../bindings/pure_arm_compute/src/model.cc | 12 ++ 6 files changed, 287 insertions(+), 4 deletions(-) create mode 100644 tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/Concat.cc create mode 100644 tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/Concat.h diff --git a/tools/nnapi_bindings/bindings/pure_arm_compute/src/compilation.cc b/tools/nnapi_bindings/bindings/pure_arm_compute/src/compilation.cc index 92eb5b2..82f2376 100644 --- a/tools/nnapi_bindings/bindings/pure_arm_compute/src/compilation.cc +++ b/tools/nnapi_bindings/bindings/pure_arm_compute/src/compilation.cc @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -155,6 +156,10 @@ struct IPlanBuilder virtual void addShapeConstr(const ::internal::tflite::operand::Index &ind, const ::arm_compute::TensorInfo &info) = 0; + virtual void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind, + const ::internal::tflite::operand::Index &base, + const ::arm_compute::Coordinates &offset, + const ::arm_compute::TensorShape &shape) = 0; virtual void addInitializer(const ::internal::tflite::operand::Index &ind, const Initializer &initializer) = 0; virtual void addStage(const Stage &) = 0; @@ -229,6 +234,7 @@ public: void visit(const ::internal::tflite::op::Conv2D::implicit::Node &node) override; void visit(const ::internal::tflite::op::MaxPool2D::implicit::Node &node) override; void visit(const ::internal::tflite::op::AvgPool2D::implicit::Node &node) override; + void visit(const ::internal::tflite::op::Concat::Node &node) override; private: const ::internal::tflite::operand::Set &_ctx; @@ -549,6 +555,40 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::implicit::Node &nod _builder.addStage(stage); } +void Planner::visit(const ::internal::tflite::op::Concat::Node &node) +{ + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + + // NOTE This implementation assumes that inputs and output are a feature + // TODO Remove this assumption + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + + // NOTE This implementation assumes concat over feature depth + // TODO Remove this assumption + assert(_ctx.at(::internal::tflite::operand::Index{node.param().axis_index}).asScala() == 3); + + // Set Shape Constraints (for output) + _builder.addShapeConstr(ofm_index, asTensorInfo(ofm_shape)); + + // Set Shape Constraints (for input) + uint32_t depth = 0; + + for (const auto &index : node.param().ifm_indexes) + { + const ::internal::tflite::operand::Index ifm_index{index}; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + + _builder.addSubsumptionConstr(ifm_index, + ofm_index, + ::arm_compute::Coordinates{0, 0, depth, 0}, + asTensorShape(ifm_shape)); + + depth += ifm_shape.C; + } + + // NOTE Concat has no actual operation! +} + class AllocationContext final : public IAllocationContext { public: @@ -597,6 +637,13 @@ public: void addShapeConstr(const ::internal::tflite::operand::Index &ind, const ::arm_compute::TensorInfo &info) override; + +public: + void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind, + const ::internal::tflite::operand::Index &base, + const ::arm_compute::Coordinates &offset, + const ::arm_compute::TensorShape &shape) override; + public: void addInitializer(const ::internal::tflite::operand::Index &ind, const Initializer &initializer) override; @@ -611,7 +658,31 @@ private: ::internal::arm_compute::Plan &_plan; private: + struct Subsumption + { + public: + Subsumption(const ::internal::tflite::operand::Index &base, + const ::arm_compute::Coordinates &offset, + const ::arm_compute::TensorShape &shape) + : _base{base}, _offset{offset}, _shape{shape} + { + // DO NOTHING + } + + public: + const ::internal::tflite::operand::Index &base(void) const { return _base; } + const ::arm_compute::Coordinates &offset(void) const { return _offset; } + const ::arm_compute::TensorShape &shape(void) const { return _shape; } + + private: + const ::internal::tflite::operand::Index _base; + const ::arm_compute::Coordinates _offset; + const ::arm_compute::TensorShape _shape; + }; + +private: std::map _tensor_info_ctx; + std::map> _subsumption_ctx; std::map _initializer_ctx; std::vector _stages; }; @@ -622,6 +693,14 @@ void PlanBuilder::addShapeConstr(const ::internal::tflite::operand::Index &ind, _tensor_info_ctx[ind.asInt()] = info; } +void PlanBuilder::addSubsumptionConstr(const ::internal::tflite::operand::Index &ind, + const ::internal::tflite::operand::Index &base, + const ::arm_compute::Coordinates &offset, + const ::arm_compute::TensorShape &shape) +{ + _subsumption_ctx[ind.asInt()] = std::make_shared(base, offset, shape); +} + void PlanBuilder::addInitializer(const ::internal::tflite::operand::Index &ind, const Initializer &initializer) { @@ -630,21 +709,96 @@ void PlanBuilder::addInitializer(const ::internal::tflite::operand::Index &ind, void PlanBuilder::addStage(const Stage &stage) { _stages.emplace_back(stage); } +#include + void PlanBuilder::finalize(void) const { // CLTensor objects to be initialized later std::vector> tensors; - // Create CLTensor - for (auto it = _tensor_info_ctx.begin(); it != _tensor_info_ctx.end(); ++it) + // Create CLTensor & CLSubTensor + auto isAllocated = [this] (int ind) + { + const ::internal::tflite::operand::Index operand_index{ind}; + return _plan.operands().exist(operand_index); + }; + + auto setCLTensor = [&] (int ind) { auto tensor = std::make_shared<::arm_compute::CLTensor>(); - tensor->allocator()->init(it->second); + tensor->allocator()->init(_tensor_info_ctx.at(ind)); // NOTE Do NOT allocate here. allocate should be invoked after configure functions - _plan.operands().set(::internal::tflite::operand::Index{it->first}, tensor); + _plan.operands().set(::internal::tflite::operand::Index{ind}, tensor); tensors.emplace_back(tensor); + }; + + auto setCLSubTensor = [&] (int curr) + { + const auto &sub_info = *(_subsumption_ctx.find(curr)->second); + + auto base_tensor = _plan.operands().at(sub_info.base()).ptr(); + + assert(base_tensor != nullptr); + + auto curr_tensor = std::make_shared<::arm_compute::CLSubTensor>(base_tensor, + sub_info.shape(), + sub_info.offset()); + + _plan.operands().set(::internal::tflite::operand::Index{curr}, curr_tensor); + }; + + for (auto it = _subsumption_ctx.begin(); it != _subsumption_ctx.end(); ++it) + { + std::stack stack; + + stack.push(it->first); + + while (!stack.empty()) + { + const auto curr = stack.top(); + + if (isAllocated(curr)) + { + // Skip if already allocated + stack.pop(); + continue; + } + + auto it_s = _subsumption_ctx.find(curr); + + if (it_s == _subsumption_ctx.end()) + { + setCLTensor(curr); + stack.pop(); + continue; + } + + const auto &sub_info = *(it_s->second); + + if (isAllocated(sub_info.base().asInt())) + { + setCLSubTensor(curr); + stack.pop(); + } + else + { + // Allocate base tensor first + stack.push(sub_info.base().asInt()); + } + } + } + + for (auto it = _tensor_info_ctx.begin(); it != _tensor_info_ctx.end(); ++it) + { + if (isAllocated(it->first)) + { + // Skip if already allocated + continue; + } + + setCLTensor(it->first); } // Process Stage diff --git a/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/arm_compute.h b/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/arm_compute.h index 6221ac4..51ec812 100644 --- a/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/arm_compute.h +++ b/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/arm_compute.h @@ -53,6 +53,12 @@ public: const std::shared_ptr<::arm_compute::ICLTensor> &tensor); public: + bool exist(const ::internal::tflite::operand::Index &ind) const + { + return _objects.find(ind.asInt()) != _objects.end(); + } + +public: const Object &at(const ::internal::tflite::operand::Index &ind) const { return _objects.at(ind.asInt()); diff --git a/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/Concat.cc b/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/Concat.cc new file mode 100644 index 0000000..0108491 --- /dev/null +++ b/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/Concat.cc @@ -0,0 +1,53 @@ +#include "internal/op/Concat.h" +#include "internal/op/NodeVisitor.h" + +#include + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Concat +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Concat +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Concat +{ + +Param::Param(uint32_t inputCount, const uint32_t* inputs, + uint32_t outputCount, const uint32_t* outputs) +{ + assert(outputCount == 1); + + ofm_index = outputs[0]; + + // When there are N + 1 inputs, each input should be interpreted as follows: + // + // [0, N) -> Input tensors + // N -> Axis + axis_index = inputs[inputCount - 1]; + + for (uint32_t n = 0; n < inputCount - 1; ++n) + { + ifm_indexes.emplace_back(inputs[n]); + } +} + +} // namespace Concat +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/Concat.h b/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/Concat.h new file mode 100644 index 0000000..11c87bb --- /dev/null +++ b/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/Concat.h @@ -0,0 +1,56 @@ +#ifndef __INTERNAL_OP_CONCAT_H__ +#define __INTERNAL_OP_CONCAT_H__ + +#include "internal/op/Node.h" + +#include +#include + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace Concat +{ + +struct Param +{ + int32_t ofm_index; + + std::vector ifm_indexes; + int32_t axis_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t* inputs, + uint32_t outputCount, const uint32_t* outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Concat +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_CONCAT_H__ diff --git a/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/NodeVisitor.h b/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/NodeVisitor.h index cd2155b..ba7ee3a 100644 --- a/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/NodeVisitor.h +++ b/tools/nnapi_bindings/bindings/pure_arm_compute/src/internal/op/NodeVisitor.h @@ -4,6 +4,7 @@ #include "internal/op/Conv2D.h" #include "internal/op/MaxPool2D.h" #include "internal/op/AvgPool2D.h" +#include "internal/op/Concat.h" namespace internal { @@ -19,6 +20,7 @@ struct NodeVisitor virtual void visit(const Conv2D::implicit::Node &) = 0; virtual void visit(const MaxPool2D::implicit::Node &) = 0; virtual void visit(const AvgPool2D::implicit::Node &) = 0; + virtual void visit(const Concat::Node &) = 0; }; } // namespace op diff --git a/tools/nnapi_bindings/bindings/pure_arm_compute/src/model.cc b/tools/nnapi_bindings/bindings/pure_arm_compute/src/model.cc index 853845e..37fb6b8 100644 --- a/tools/nnapi_bindings/bindings/pure_arm_compute/src/model.cc +++ b/tools/nnapi_bindings/bindings/pure_arm_compute/src/model.cc @@ -158,6 +158,18 @@ ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model, break; } + case ANEURALNETWORKS_CONCATENATION: + { + using internal::tflite::op::Concat::Param; + using internal::tflite::op::Concat::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back(Param{inputCount, inputs, outputCount, outputs}); + + break; + } default: throw std::runtime_error{"Not supported operation"}; }; -- 2.7.4