From 67b4b9729254517c65a372297a3182972b58d6c4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EC=B5=9C=EC=84=B1=EC=A7=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Principal=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Wed, 11 Jul 2018 11:39:45 +0900 Subject: [PATCH] Add explicit padding to dconv (#1937) This commit adds explicit padding to dconv. Signed-off-by: SungJin Choi --- runtimes/pure_arm_compute/src/compilation.cc | 125 +++++++++++++++++++++ .../src/internal/op/DepthwiseConv2D.cc | 45 ++++++++ .../src/internal/op/DepthwiseConv2D.h | 49 ++++++++ .../pure_arm_compute/src/internal/op/NodeVisitor.h | 1 + runtimes/pure_arm_compute/src/model.cc | 14 ++- 5 files changed, 230 insertions(+), 4 deletions(-) diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc index 631747c..d71135c 100644 --- a/runtimes/pure_arm_compute/src/compilation.cc +++ b/runtimes/pure_arm_compute/src/compilation.cc @@ -329,6 +329,7 @@ public: void visit(const ::internal::tflite::op::Conv2D::implicit::Node &node) override; void visit(const ::internal::tflite::op::Conv2D::Explicit::Node &node) override; void visit(const ::internal::tflite::op::DepthwiseConv2D::implicit::Node &node) override; + void visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Node &node) override; void visit(const ::internal::tflite::op::Dequantize::Node &node) override; void visit(const ::internal::tflite::op::MaxPool2D::implicit::Node &node) override; void visit(const ::internal::tflite::op::AvgPool2D::implicit::Node &node) override; @@ -1096,6 +1097,130 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::implicit::Nod _builder.addStage(stage); } +void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Node &node) +{ + const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index}; + + const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index}; + const ::internal::tflite::operand::Index ker_index{node.param().ker_index}; + const ::internal::tflite::operand::Index bias_index{node.param().bias_index}; + + const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index}; + const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index}; + + const ::internal::tflite::operand::Index padding_left_index{node.param().padding_left_index}; + const ::internal::tflite::operand::Index padding_right_index{node.param().padding_right_index}; + const ::internal::tflite::operand::Index padding_top_index{node.param().padding_top_index}; + const ::internal::tflite::operand::Index padding_bottom_index{node.param().padding_bottom_index}; + + const ::internal::tflite::operand::Index multipler_index{node.param().multipler_index}; + const ::internal::tflite::operand::Index activation_index{node.param().activation_index}; + + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(); + + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(); + const auto ker_shape = _ctx.at(ker_index).shape().asFeature(); + const auto bias_size = _ctx.at(bias_index).shape().asVector(); + + auto multiplier = _ctx.at(multipler_index).asScalar(); + + // Multiplier in CLDepthwiseConvolutionLayer is supported after ARMCompute 18.05 + assert(multiplier == 1); + assert(ker_shape.C == bias_size); + assert(ker_shape.C == ifm_shape.C * multiplier); + + const int32_t padding_left = _ctx.at(padding_left_index).asScalar(); + const int32_t padding_right = _ctx.at(padding_right_index).asScalar(); + const int32_t padding_top = _ctx.at(padding_top_index).asScalar(); + const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar(); + + Stride stride; + + stride.vertical = _ctx.at(vstride_index).asScalar(); + stride.horizontal = _ctx.at(hstride_index).asScalar(); + + // TODO Should move to the place where the operand is handled, if it is possible. + // Set Shape Constraints and TensorInfo + _builder.addShapeConstr(ofm_index, asTensorInfo(ofm_shape, _ctx.at(ofm_index).type())); + _builder.addShapeConstr(ifm_index, asTensorInfo(ifm_shape, _ctx.at(ifm_index).type())); + // NOTE DepthwiseConv2D kernel is of shape [1, KER_W, KER_H, IFM_C * MULTIPLIER] + _builder.addShapeConstr(ker_index, asTensorInfo(ker_shape, _ctx.at(ker_index).type())); + _builder.addShapeConstr(bias_index, asTensorInfo(bias_size, _ctx.at(bias_index).type())); + + // Construct operation parameters + struct Param + { + int ofm_index; + int ifm_index; + int ker_index; + int bias_index; + + Padding padding; + Stride stride; + + int multipler; + FuseCode activation; + }; + + Param param; + + param.ofm_index = ofm_index.asInt(); + param.ifm_index = ifm_index.asInt(); + param.ker_index = ker_index.asInt(); + param.bias_index = bias_index.asInt(); + + param.stride = stride; + + param.padding.left = padding_left; + param.padding.right = padding_right; + param.padding.top = padding_top; + param.padding.bottom = padding_bottom; + + param.multipler = multiplier; + param.activation = static_cast(_ctx.at(activation_index).asScalar()); + + VERBOSE(DepthwiseConv2D) << "OFM_C: " << ofm_shape.C << std::endl; + VERBOSE(DepthwiseConv2D) << "OFM_H: " << ofm_shape.H << std::endl; + VERBOSE(DepthwiseConv2D) << "OFM_W: " << ofm_shape.W << std::endl; + + VERBOSE(DepthwiseConv2D) << "IFM_C: " << ifm_shape.C << std::endl; + VERBOSE(DepthwiseConv2D) << "IFM_H: " << ifm_shape.H << std::endl; + VERBOSE(DepthwiseConv2D) << "IFM_W: " << ifm_shape.W << std::endl; + + VERBOSE(DepthwiseConv2D) << "KER_C: " << ker_shape.C << std::endl; + VERBOSE(DepthwiseConv2D) << "KER_H: " << ker_shape.H << std::endl; + VERBOSE(DepthwiseConv2D) << "KER_W: " << ker_shape.W << std::endl; + + VERBOSE(DepthwiseConv2D) << "STRIDE_H: " << param.stride.vertical << std::endl; + VERBOSE(DepthwiseConv2D) << "STRIDE_W: " << param.stride.horizontal << std::endl; + + VERBOSE(DepthwiseConv2D) << "ACTIVATION: " << param.activation << std::endl; + + VERBOSE(DepthwiseConv2D) << "PAD(T): " << param.padding.top << std::endl; + VERBOSE(DepthwiseConv2D) << "PAD(B): " << param.padding.bottom << std::endl; + VERBOSE(DepthwiseConv2D) << "PAD(L): " << param.padding.left << std::endl; + VERBOSE(DepthwiseConv2D) << "PAD(R): " << param.padding.right << std::endl; + + auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) { + auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index}); + auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index}); + auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index}); + auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index}); + + const auto conv_info = asPadStringInfo(param.padding, param.stride); + + auto fn = nnfw::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); + + fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info); + + builder.append("DepthwiseConv2D", std::move(fn)); + + ActivationBuilder{builder}.append(param.activation, ofm_alloc); + }; + + _builder.addStage(stage); +} + void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node) { const ::internal::tflite::operand::Index output_index{node.param().output_index}; diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc index be80050..c998810 100644 --- a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc +++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc @@ -11,6 +11,13 @@ namespace op { namespace DepthwiseConv2D { +namespace Explicit +{ + +void Node::accept(NodeVisitor &&v) const { v.visit(*this); } + +} // namespace Explicit + namespace implicit { @@ -30,6 +37,44 @@ namespace op { namespace DepthwiseConv2D { +namespace Explicit +{ + +Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) +{ + assert(inputCount == 11 && outputCount == 1); + + ofm_index = outputs[0]; + + // Each input should be interpreted as follows: + // + // 0 -> IFM Tensor Index + // 1 -> Kernel Tensor Index + // 2 -> Bias Tensor Index + // 3 -> Padding_left index + // 4 -> Padding_right index + // 5 -> Padding_top index + // 6 -> Padding_bottom index + // 7 -> Stride (width) Index + // 8 -> Stride (height) INdex + // 9 -> Depthwise Multipler + // 10 -> Activation Index + ifm_index = inputs[0]; + ker_index = inputs[1]; + bias_index = inputs[2]; + padding_left_index = inputs[3]; + padding_right_index = inputs[4]; + padding_top_index = inputs[5]; + padding_bottom_index = inputs[6]; + hstride_index = inputs[7]; + vstride_index = inputs[8]; + multipler_index = inputs[9]; + activation_index = inputs[10]; +} + +} // namespace Explicit + namespace implicit { diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h index c184b39..d6f86a6 100644 --- a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h +++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h @@ -13,6 +13,55 @@ namespace op { namespace DepthwiseConv2D { +namespace Explicit +{ + +struct Param +{ + int32_t ofm_index; + + int32_t ifm_index; + int32_t ker_index; + int32_t bias_index; + + int32_t hstride_index; + int32_t vstride_index; + + int32_t padding_left_index; + int32_t padding_right_index; + int32_t padding_top_index; + int32_t padding_bottom_index; + + int32_t multipler_index; + int32_t activation_index; + + Param() = default; + Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs); +}; + +class Node final : public op::Node +{ +public: + Node(const Param ¶m) : _param(param) + { + // DO NOTHING + } + +public: + virtual ~Node() = default; + +public: + const Param ¶m(void) const { return _param; } + +public: + void accept(NodeVisitor &&) const override; + +private: + const Param _param; +}; + +} // namespace Explicit + namespace implicit { diff --git a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h index f9a4660..c9f9c90 100644 --- a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h +++ b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h @@ -43,6 +43,7 @@ struct NodeVisitor virtual void visit(const Conv2D::implicit::Node &) = 0; virtual void visit(const Conv2D::Explicit::Node &) = 0; virtual void visit(const DepthwiseConv2D::implicit::Node &) = 0; + virtual void visit(const DepthwiseConv2D::Explicit::Node &) = 0; virtual void visit(const Dequantize::Node &) = 0; virtual void visit(const MaxPool2D::implicit::Node &) = 0; virtual void visit(const AvgPool2D::implicit::Node &) = 0; diff --git a/runtimes/pure_arm_compute/src/model.cc b/runtimes/pure_arm_compute/src/model.cc index d12bd1e..bea2617 100644 --- a/runtimes/pure_arm_compute/src/model.cc +++ b/runtimes/pure_arm_compute/src/model.cc @@ -168,10 +168,10 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model, } case ANEURALNETWORKS_DEPTHWISE_CONV_2D: { - // inputCount is either 8 or 10 acccording to NN API specification. + // inputCount is either 8 or 11 acccording to NN API specification. // - Padding is implicit when inputCount is 8 - // - Padding is explicit when inputCount is 10 - assert(inputCount == 8 || inputCount == 10); + // - Padding is explicit when inputCount is 11 + assert(inputCount == 8 || inputCount == 11); assert(outputCount == 1); if (inputCount == 8) @@ -186,7 +186,13 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model, } else { - throw std::runtime_error{"Explicit padding is not supported, yet"}; + using internal::tflite::op::DepthwiseConv2D::Explicit::Param; + using internal::tflite::op::DepthwiseConv2D::Explicit::Node; + + // Add 'operations' + auto &operations = model->deref().operations(); + + operations.emplace_back(Param{inputCount, inputs, outputCount, outputs}); } break; -- 2.7.4