From 67b4b9729254517c65a372297a3182972b58d6c4 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EC=B5=9C=EC=84=B1=EC=A7=84/=EB=8F=99=EC=9E=91=EC=A0=9C?=
 =?utf8?q?=EC=96=B4Lab=28SR=29/Principal=20Engineer/=EC=82=BC=EC=84=B1?=
 =?utf8?q?=EC=A0=84=EC=9E=90?= <lotieye.choi@samsung.com>
Date: Wed, 11 Jul 2018 11:39:45 +0900
Subject: [PATCH] Add explicit padding to dconv (#1937)

This commit adds explicit padding to dconv.

Signed-off-by: SungJin Choi <lotieye.choi@samsung.com>
---
 runtimes/pure_arm_compute/src/compilation.cc       | 125 +++++++++++++++++++++
 .../src/internal/op/DepthwiseConv2D.cc             |  45 ++++++++
 .../src/internal/op/DepthwiseConv2D.h              |  49 ++++++++
 .../pure_arm_compute/src/internal/op/NodeVisitor.h |   1 +
 runtimes/pure_arm_compute/src/model.cc             |  14 ++-
 5 files changed, 230 insertions(+), 4 deletions(-)
diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc
index 631747c..d71135c 100644
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -329,6 +329,7 @@ public:
   void visit(const ::internal::tflite::op::Conv2D::implicit::Node &node) override;
   void visit(const ::internal::tflite::op::Conv2D::Explicit::Node &node) override;
   void visit(const ::internal::tflite::op::DepthwiseConv2D::implicit::Node &node) override;
+  void visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Node &node) override;
   void visit(const ::internal::tflite::op::Dequantize::Node &node) override;
   void visit(const ::internal::tflite::op::MaxPool2D::implicit::Node &node) override;
   void visit(const ::internal::tflite::op::AvgPool2D::implicit::Node &node) override;
@@ -1096,6 +1097,130 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::implicit::Nod
   _builder.addStage(stage);
 }
 
+void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Node &node)
+{
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index ker_index{node.param().ker_index};
+  const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+
+  const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+  const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+  const ::internal::tflite::operand::Index padding_left_index{node.param().padding_left_index};
+  const ::internal::tflite::operand::Index padding_right_index{node.param().padding_right_index};
+  const ::internal::tflite::operand::Index padding_top_index{node.param().padding_top_index};
+  const ::internal::tflite::operand::Index padding_bottom_index{node.param().padding_bottom_index};
+
+  const ::internal::tflite::operand::Index multipler_index{node.param().multipler_index};
+  const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
+  const auto bias_size = _ctx.at(bias_index).shape().asVector();
+
+  auto multiplier = _ctx.at(multipler_index).asScalar<int>();
+
+  // Multiplier in CLDepthwiseConvolutionLayer is supported after ARMCompute 18.05
+  assert(multiplier == 1);
+  assert(ker_shape.C == bias_size);
+  assert(ker_shape.C == ifm_shape.C * multiplier);
+
+  const int32_t padding_left = _ctx.at(padding_left_index).asScalar<int32_t>();
+  const int32_t padding_right = _ctx.at(padding_right_index).asScalar<int32_t>();
+  const int32_t padding_top = _ctx.at(padding_top_index).asScalar<int32_t>();
+  const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar<int32_t>();
+
+  Stride stride;
+
+  stride.vertical = _ctx.at(vstride_index).asScalar<int32_t>();
+  stride.horizontal = _ctx.at(hstride_index).asScalar<int32_t>();
+
+  // TODO Should move to the place where the operand is handled, if it is possible.
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(ofm_index, asTensorInfo(ofm_shape, _ctx.at(ofm_index).type()));
+  _builder.addShapeConstr(ifm_index, asTensorInfo(ifm_shape, _ctx.at(ifm_index).type()));
+  // NOTE DepthwiseConv2D kernel is of shape [1, KER_W, KER_H, IFM_C * MULTIPLIER]
+  _builder.addShapeConstr(ker_index, asTensorInfo(ker_shape, _ctx.at(ker_index).type()));
+  _builder.addShapeConstr(bias_index, asTensorInfo(bias_size, _ctx.at(bias_index).type()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+    int ker_index;
+    int bias_index;
+
+    Padding padding;
+    Stride stride;
+
+    int multipler;
+    FuseCode activation;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+  param.ker_index = ker_index.asInt();
+  param.bias_index = bias_index.asInt();
+
+  param.stride = stride;
+
+  param.padding.left = padding_left;
+  param.padding.right = padding_right;
+  param.padding.top = padding_top;
+  param.padding.bottom = padding_bottom;
+
+  param.multipler = multiplier;
+  param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+  VERBOSE(DepthwiseConv2D) << "OFM_C: " << ofm_shape.C << std::endl;
+  VERBOSE(DepthwiseConv2D) << "OFM_H: " << ofm_shape.H << std::endl;
+  VERBOSE(DepthwiseConv2D) << "OFM_W: " << ofm_shape.W << std::endl;
+
+  VERBOSE(DepthwiseConv2D) << "IFM_C: " << ifm_shape.C << std::endl;
+  VERBOSE(DepthwiseConv2D) << "IFM_H: " << ifm_shape.H << std::endl;
+  VERBOSE(DepthwiseConv2D) << "IFM_W: " << ifm_shape.W << std::endl;
+
+  VERBOSE(DepthwiseConv2D) << "KER_C: " << ker_shape.C << std::endl;
+  VERBOSE(DepthwiseConv2D) << "KER_H: " << ker_shape.H << std::endl;
+  VERBOSE(DepthwiseConv2D) << "KER_W: " << ker_shape.W << std::endl;
+
+  VERBOSE(DepthwiseConv2D) << "STRIDE_H: " << param.stride.vertical << std::endl;
+  VERBOSE(DepthwiseConv2D) << "STRIDE_W: " << param.stride.horizontal << std::endl;
+
+  VERBOSE(DepthwiseConv2D) << "ACTIVATION: " << param.activation << std::endl;
+
+  VERBOSE(DepthwiseConv2D) << "PAD(T): " << param.padding.top << std::endl;
+  VERBOSE(DepthwiseConv2D) << "PAD(B): " << param.padding.bottom << std::endl;
+  VERBOSE(DepthwiseConv2D) << "PAD(L): " << param.padding.left << std::endl;
+  VERBOSE(DepthwiseConv2D) << "PAD(R): " << param.padding.right << std::endl;
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+    auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index});
+    auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
+
+    const auto conv_info = asPadStringInfo(param.padding, param.stride);
+
+    auto fn = nnfw::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+
+    fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info);
+
+    builder.append("DepthwiseConv2D", std::move(fn));
+
+    ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+  };
+
+  _builder.addStage(stage);
+}
+
 void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node)
 {
   const ::internal::tflite::operand::Index output_index{node.param().output_index};
diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc
index be80050..c998810 100644
--- a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc
@@ -11,6 +11,13 @@ namespace op
 {
 namespace DepthwiseConv2D
 {
+namespace Explicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Explicit
+
 namespace implicit
 {
 
@@ -30,6 +37,44 @@ namespace op
 {
 namespace DepthwiseConv2D
 {
+namespace Explicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 11 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> IFM Tensor Index
+  //  1 -> Kernel Tensor Index
+  //  2 -> Bias Tensor Index
+  //  3 -> Padding_left index
+  //  4 -> Padding_right index
+  //  5 -> Padding_top index
+  //  6 -> Padding_bottom index
+  //  7 -> Stride (width) Index
+  //  8 -> Stride (height) INdex
+  //  9 -> Depthwise Multipler
+  //  10 -> Activation Index
+  ifm_index = inputs[0];
+  ker_index = inputs[1];
+  bias_index = inputs[2];
+  padding_left_index = inputs[3];
+  padding_right_index = inputs[4];
+  padding_top_index = inputs[5];
+  padding_bottom_index = inputs[6];
+  hstride_index = inputs[7];
+  vstride_index = inputs[8];
+  multipler_index = inputs[9];
+  activation_index = inputs[10];
+}
+
+} // namespace Explicit
+
 namespace implicit
 {
 
diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
index c184b39..d6f86a6 100644
--- a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
@@ -13,6 +13,55 @@ namespace op
 {
 namespace DepthwiseConv2D
 {
+namespace Explicit
+{
+
+struct Param
+{
+  int32_t ofm_index;
+
+  int32_t ifm_index;
+  int32_t ker_index;
+  int32_t bias_index;
+
+  int32_t hstride_index;
+  int32_t vstride_index;
+
+  int32_t padding_left_index;
+  int32_t padding_right_index;
+  int32_t padding_top_index;
+  int32_t padding_bottom_index;
+
+  int32_t multipler_index;
+  int32_t activation_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace Explicit
+
 namespace implicit
 {
 
diff --git a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h
index f9a4660..c9f9c90 100644
--- a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h
+++ b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h
@@ -43,6 +43,7 @@ struct NodeVisitor
   virtual void visit(const Conv2D::implicit::Node &) = 0;
   virtual void visit(const Conv2D::Explicit::Node &) = 0;
   virtual void visit(const DepthwiseConv2D::implicit::Node &) = 0;
+  virtual void visit(const DepthwiseConv2D::Explicit::Node &) = 0;
   virtual void visit(const Dequantize::Node &) = 0;
   virtual void visit(const MaxPool2D::implicit::Node &) = 0;
   virtual void visit(const AvgPool2D::implicit::Node &) = 0;
diff --git a/runtimes/pure_arm_compute/src/model.cc b/runtimes/pure_arm_compute/src/model.cc
index d12bd1e..bea2617 100644
--- a/runtimes/pure_arm_compute/src/model.cc
+++ b/runtimes/pure_arm_compute/src/model.cc
@@ -168,10 +168,10 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
     }
     case ANEURALNETWORKS_DEPTHWISE_CONV_2D:
     {
-      // inputCount is either 8 or 10 acccording to NN API specification.
+      // inputCount is either 8 or 11 acccording to NN API specification.
       //  - Padding is implicit when inputCount is 8
-      //  - Padding is explicit when inputCount is 10
-      assert(inputCount == 8 || inputCount == 10);
+      //  - Padding is explicit when inputCount is 11
+      assert(inputCount == 8 || inputCount == 11);
       assert(outputCount == 1);
 
       if (inputCount == 8)
@@ -186,7 +186,13 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
       }
       else
       {
-        throw std::runtime_error{"Explicit padding is not supported, yet"};
+        using internal::tflite::op::DepthwiseConv2D::Explicit::Param;
+        using internal::tflite::op::DepthwiseConv2D::Explicit::Node;
+
+        // Add 'operations'
+        auto &operations = model->deref().operations();
+
+        operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
       }
 
       break;
-- 
2.7.4