From 75c9ba313fbfe959ad3c456d11089064fad66968 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EB=B0=95=EC=A2=85=ED=98=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Staff=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Tue, 12 Jun 2018 16:53:43 +0900 Subject: [PATCH] [Pure CL] Support DepthwiseConv2D (#1661) This commit introduces DpethwiseConv2D operation support in pure CL runtime. Signed-off-by: Jonghyun Park --- runtimes/pure_arm_compute/src/compilation.cc | 121 +++++++++++++++++++++ .../src/internal/op/DepthwiseConv2D.cc | 67 ++++++++++++ .../src/internal/op/DepthwiseConv2D.h | 65 +++++++++++ .../pure_arm_compute/src/internal/op/NodeVisitor.h | 2 + runtimes/pure_arm_compute/src/model.cc | 25 +++++ 5 files changed, 280 insertions(+) create mode 100644 runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc create mode 100644 runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index cb887d2..4bf4d96 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include "internal/arm_compute/Cast.h" #include "internal/arm_compute/kernel/View.h" @@ -301,6 +302,7 @@ public: void visit(const ::internal::tflite::op::Mul::Node &node) override; void visit(const ::internal::tflite::op::Div::Node &node) override; void visit(const ::internal::tflite::op::Conv2D::implicit::Node &node) override; + void visit(const ::internal::tflite::op::DepthwiseConv2D::implicit::Node &node) override; void visit(const ::internal::tflite::op::MaxPool2D::implicit::Node &node) override; void visit(const ::internal::tflite::op::AvgPool2D::implicit::Node &node) override; void visit(const ::internal::tflite::op::Concat::Node &node) override; @@ -798,6 +800,125 @@ void Planner::visit(const ::internal::tflite::op::Conv2D::implicit::Node &node) _builder.addStage(stage); } +void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::implicit::Node &node) +{ + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index ker_index{node.param().ker_index}; + const ::internal::tflite::operand::Index bias_index{node.param().bias_index}; + + const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index}; + const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index}; + + const ::internal::tflite::operand::Index padding_index{node.param().padding_index}; + const ::internal::tflite::operand::Index multipler_index{node.param().multipler_index}; + const ::internal::tflite::operand::Index activation_index{node.param().activation_index}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(); + const auto bias_size = _ctx.at(bias_index).shape().asVector(); + + auto multiplier = _ctx.at(multipler_index).asScala(); + + // Multiplier in CLDepthwiseConvolutionLayer is supported after ARMCompute 18.05 + assert(multiplier == 1); + assert(ker_shape.C == bias_size); + assert(ker_shape.C == ifm_shape.C * multiplier); + + const PaddingCode padding_type = + static_cast(_ctx.at(padding_index).asScala()); + + Stride stride; + + stride.vertical = _ctx.at(vstride_index).asScala(); + stride.horizontal = _ctx.at(hstride_index).asScala(); + + assert((ANEURALNETWORKS_PADDING_SAME == padding_type) || + (ANEURALNETWORKS_PADDING_VALID == padding_type)); + + // TODO Should move to the place where the operand is handled, if it is possible. + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(ofm_index, asTensorInfo(ofm_shape, _ctx.at(ofm_index).type())); + _builder.addShapeConstr(ifm_index, asTensorInfo(ifm_shape, _ctx.at(ifm_index).type())); + // NOTE DepthwiseConv2D kernel is of shape [1, KER_W, KER_H, IFM_C * MULTIPLIER] + _builder.addShapeConstr(ker_index, asTensorInfo(ker_shape, _ctx.at(ker_index).type())); + _builder.addShapeConstr(bias_index, asTensorInfo(bias_size, _ctx.at(bias_index).type())); + + // Construct operation parameters + struct Param + { + int ofm_index; + int ifm_index; + int ker_index; + int bias_index; + + Padding padding; + Stride stride; + + int multipler; + FuseCode activation; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + param.ker_index = ker_index.asInt(); + param.bias_index = bias_index.asInt(); + + param.stride = stride; + param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME) + ? same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H) + : valid_padding(); + + param.multipler = multiplier; + param.activation = static_cast(_ctx.at(activation_index).asScala()); + + VERBOSE(DepthwiseConv2D) << "OFM_C: " << ofm_shape.C << std::endl; + VERBOSE(DepthwiseConv2D) << "OFM_H: " << ofm_shape.H << std::endl; + VERBOSE(DepthwiseConv2D) << "OFM_W: " << ofm_shape.W << std::endl; + + VERBOSE(DepthwiseConv2D) << "IFM_C: " << ifm_shape.C << std::endl; + VERBOSE(DepthwiseConv2D) << "IFM_H: " << ifm_shape.H << std::endl; + VERBOSE(DepthwiseConv2D) << "IFM_W: " << ifm_shape.W << std::endl; + + VERBOSE(DepthwiseConv2D) << "KER_C: " << ker_shape.C << std::endl; + VERBOSE(DepthwiseConv2D) << "KER_H: " << ker_shape.H << std::endl; + VERBOSE(DepthwiseConv2D) << "KER_W: " << ker_shape.W << std::endl; + + VERBOSE(DepthwiseConv2D) << "STRIDE_H: " << param.stride.vertical << std::endl; + VERBOSE(DepthwiseConv2D) << "STRIDE_W: " << param.stride.horizontal << std::endl; + + VERBOSE(DepthwiseConv2D) << "ACTIVATION: " << param.activation << std::endl; + + VERBOSE(DepthwiseConv2D) << "PAD(T): " << param.padding.top << std::endl; + VERBOSE(DepthwiseConv2D) << "PAD(B): " << param.padding.bottom << std::endl; + VERBOSE(DepthwiseConv2D) << "PAD(L): " << param.padding.left << std::endl; + VERBOSE(DepthwiseConv2D) << "PAD(R): " << param.padding.right << std::endl; + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index}); + auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index}); + + const auto conv_info = asPadStringInfo(param.padding, param.stride); + + auto fn = make_layer<::arm_compute::CLDepthwiseConvolutionLayer>(); + + fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info); + + builder.append(std::move(fn)); + + ActivationBuilder{builder}.append(param.activation, ofm_alloc); + }; + + _builder.addStage(stage); +} + void Planner::visit(const ::internal::tflite::op::MaxPool2D::implicit::Node &node) { const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc new file mode 100644 index 0000000..be80050 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc @@ -0,0 +1,67 @@ +#include "internal/op/DepthwiseConv2D.h" +#include "internal/op/NodeVisitor.h" + +#include + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace DepthwiseConv2D +{ +namespace implicit +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace implicit +} // namespace DepthwiseConv2D +} // namespace op +} // namespace tflite +} // namespace internal + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace DepthwiseConv2D +{ +namespace implicit +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 8 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> IFM Tensor Index + // 1 -> Kernel Tensor Index + // 2 -> Bias Tensor Index + // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index + // 4 -> Stride (width) Index + // 5 -> Stride (height) INdex + // 6 -> Depthwise Multipler + // 7 -> Activation Index + ifm_index = inputs[0]; + ker_index = inputs[1]; + bias_index = inputs[2]; + padding_index = inputs[3]; + hstride_index = inputs[4]; + vstride_index = inputs[5]; + multipler_index = inputs[6]; + activation_index = inputs[7]; +} + +} // namespace implicit +} // namespace DepthwiseConv2D +} // namespace op +} // namespace tflite +} // namespace internal diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h new file mode 100644 index 0000000..c184b39 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h @@ -0,0 +1,65 @@ +#ifndef __INTERNAL_OP_DEPTHWISE_CONV_2D_H__ +#define __INTERNAL_OP_DEPTHWISE_CONV_2D_H__ + +#include "internal/op/Node.h" + +#include + +namespace internal +{ +namespace tflite +{ +namespace op +{ +namespace DepthwiseConv2D +{ +namespace implicit +{ + +struct Param +{ + int32_t ofm_index; + + int32_t ifm_index; + int32_t ker_index; + int32_t bias_index; + + int32_t hstride_index; + int32_t vstride_index; + + int32_t padding_index; + int32_t multipler_index; + int32_t activation_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace implicit +} // namespace DepthwiseConv2D +} // namespace op +} // namespace tflite +} // namespace internal + +#endif // __INTERNAL_OP_CONV_2D_H__ diff --git a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h index 69d82a3..90a1f51 100644 --- a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h +++ b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h @@ -6,6 +6,7 @@ #include "internal/op/Mul.h" #include "internal/op/Div.h" #include "internal/op/Conv2D.h" +#include "internal/op/DepthwiseConv2D.h" #include "internal/op/MaxPool2D.h" #include "internal/op/AvgPool2D.h" #include "internal/op/Concat.h" @@ -35,6 +36,7 @@ struct NodeVisitor virtual void visit(const Mul::Node &) = 0; virtual void visit(const Div::Node &) = 0; virtual void visit(const Conv2D::implicit::Node &) = 0; + virtual void visit(const DepthwiseConv2D::implicit::Node &) = 0; virtual void visit(const MaxPool2D::implicit::Node &) = 0; virtual void visit(const AvgPool2D::implicit::Node &) = 0; virtual void visit(const Concat::Node &) = 0; diff --git a/runtimes/pure_arm_compute/src/model.cc b/runtimes/pure_arm_compute/src/model.cc index 9dafc33..c9f768e 100644 --- a/runtimes/pure_arm_compute/src/model.cc +++ b/runtimes/pure_arm_compute/src/model.cc @@ -160,6 +160,31 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model, break; } + case ANEURALNETWORKS_DEPTHWISE_CONV_2D: + { + // inputCount is either 8 or 10 acccording to NN API specification. + // - Padding is implicit when inputCount is 8 + // - Padding is explicit when inputCount is 10 + assert(inputCount == 8 || inputCount == 10); + assert(outputCount == 1); + + if (inputCount == 8) + { + using internal::tflite::op::DepthwiseConv2D::implicit::Param; + using internal::tflite::op::DepthwiseConv2D::implicit::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back(Param{inputCount, inputs, outputCount, outputs}); + } + else + { + throw std::runtime_error{"Explicit padding is not supported, yet"}; + } + + break; + } case ANEURALNETWORKS_MAX_POOL_2D: { // inputCount is either 7 or 9 acccording to NN API specification. -- 2.7.4