[moco-tf] Support DepthwiseConv2dNative in FuseBinaryIntoPreceding (#6490)

author 남궁석/On-Device Lab(SR)/Engineer/삼성전자 <sk.namkoong@samsung.com>

Mon, 12 Aug 2019 07:27:49 +0000 (16:27 +0900)

committer 박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>

Mon, 12 Aug 2019 07:27:49 +0000 (16:27 +0900)
author 남궁석/On-Device Lab(SR)/Engineer/삼성전자 <sk.namkoong@samsung.com>
Mon, 12 Aug 2019 07:27:49 +0000 (16:27 +0900)
committer 박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Mon, 12 Aug 2019 07:27:49 +0000 (16:27 +0900)
diff --git a/compiler/moco-tf/src/Transforms/FuseBinaryIntoPreceding.cpp b/compiler/moco-tf/src/Transforms/FuseBinaryIntoPreceding.cpp

index 642ba40..be5535e 100644 (file)
--- a/compiler/moco-tf/src/Transforms/FuseBinaryIntoPreceding.cpp
+++ b/compiler/moco-tf/src/Transforms/FuseBinaryIntoPreceding.cpp
@@ -23,6 +23,7 @@
  #include "IR/TFBiasAdd.h"
  #include "IR/TFConst.h"
  #include "IR/TFConv2D.h"
+#include "IR/TFDepthwiseConv2dNative.h"
  #include "IR/TFMul.h"
  
  #include <loco.h>
@@ -41,7 +42,7 @@ namespace
  enum class FuseType
  {
    Conv2D,
-  // TODO Support DepthwiseConv2D
+  DepthwiseConv2D,
    // TODO Support FullyConnected
  };
  
@@ -152,6 +153,61 @@ moco::tf::TFConst *create_kernel_from_fuse_mulparam<FuseType::Conv2D>(loco::Grap
    return ker_fused;
  }
  
+/**
+ * @brief Create a kernel from fuse mulparam<FuseType::DepthwiseConv2D> object
+ * @return Kernel of fused mulparam
+ */
+template <>
+moco::tf::TFConst *create_kernel_from_fuse_mulparam<FuseType::DepthwiseConv2D>(
+    loco::Graph *graph, moco::tf::TFConst *ker, moco::tf::TFConst *mulparam)
+{
+  auto ker_shape_inf = ker->annot<moco::tf::ShapeInferenceData>();
+  assert(ker_shape_inf);
+  auto ker_shape = ker_shape_inf->tensor_shape();
+
+  auto mulparam_shape_inf = mulparam->annot<moco::tf::ShapeInferenceData>();
+  assert(mulparam_shape_inf != nullptr);
+  auto mulparam_shape = mulparam_shape_inf->tensor_shape();
+
+  // create new ker_fused with same size of ker
+  auto ker_fused = graph->nodes()->create<moco::tf::TFConst>();
+
+  assert(ker_shape.rank() == 4);
+  assert(mulparam_shape.rank() == 1);
+  assert(ker_shape.dim(2).value() * ker_shape.dim(3).value() == mulparam_shape.dim(0).value());
+
+  ker_fused->dtype(loco::DataType::FLOAT32);
+  copy_shape(ker, ker_fused);
+  auto ker_num_elements = ker->size<loco::DataType::FLOAT32>();
+  ker_fused->size<loco::DataType::FLOAT32>(ker_num_elements);
+
+  // TensorFlow DepthwiseConv2DNative Kernel has HWIM format
+  // Broadcast Mul vector to Kernel tensor by the Output
+  const uint32_t ker_height = ker_shape.dim(0).value();
+  const uint32_t ker_width = ker_shape.dim(1).value();
+  const uint32_t ker_input = ker_shape.dim(2).value();
+  const uint32_t ker_multiplier = ker_shape.dim(3).value();
+
+  for (uint32_t ker_y = 0; ker_y < ker_height; ++ker_y)
+  {
+    for (uint32_t ker_x = 0; ker_x < ker_width; ++ker_x)
+    {
+      for (uint32_t in_ch = 0; in_ch < ker_input; ++in_ch)
+      {
+        uint32_t num_items = ((ker_y * ker_width + ker_x) * ker_input + in_ch) * ker_multiplier;
+        for (uint32_t ker_ch = 0; ker_ch < ker_multiplier; ++ker_ch)
+        {
+          auto mulparam_v = mulparam->at<loco::DataType::FLOAT32>(in_ch + ker_ch * ker_input);
+          auto ker_v = ker->at<loco::DataType::FLOAT32>(num_items + ker_ch);
+          ker_fused->at<loco::DataType::FLOAT32>(num_items + ker_ch) = ker_v * mulparam_v;
+        }
+      }
+    }
+  }
+
+  return ker_fused;
+}
+
  // Will be deprecated
  moco::tf::TFConst *create_kernal_from_fuse_mulparam(loco::Graph *graph, moco::tf::TFConst *ker,
                                                      moco::tf::TFConst *mulparam)
@@ -297,7 +353,6 @@ bool fuse_to_preceding(loco::Graph *graph, moco::tf::TFMul *node)
  
    moco::tf::TFConst *mulparam = nullptr;
    moco::tf::TFNode *precedingOp = nullptr;
-  // TODO support DepthWiseConv2D
    // TODO support FullyConnected
  
    if (xc != nullptr)
@@ -316,6 +371,9 @@ bool fuse_to_preceding(loco::Graph *graph, moco::tf::TFMul *node)
    moco::tf::TFNode *fused_node = nullptr;
    if (auto conv2d = dynamic_cast<moco::tf::TFConv2D *>(precedingOp))
      fused_node = fused_conv_node<FuseType::Conv2D, moco::tf::TFConv2D>(graph, mulparam, conv2d);
+  else if (auto dw_conv2d = dynamic_cast<moco::tf::TFDepthwiseConv2dNative *>(precedingOp))
+    fused_node = fused_conv_node<FuseType::DepthwiseConv2D, moco::tf::TFDepthwiseConv2dNative>(
+        graph, mulparam, dw_conv2d);
  
    // Not ready yet
    if (fused_node == nullptr)
@@ -420,7 +478,6 @@ bool fuse_to_preceding(loco::Graph *graph, moco::tf::TFAdd *node)
    moco::tf::TFConst *addparam = nullptr;
    moco::tf::TFNode *precedingOp = nullptr;
    moco::tf::TFBiasAdd *biasadd = nullptr;
-  // TODO support DepthWiseConv2D
    // TODO support FullyConnected
  
    if (xc != nullptr)
@@ -454,6 +511,10 @@ bool fuse_to_preceding(loco::Graph *graph, moco::tf::TFAdd *node)
    {
      biasadd = create_biasadd_node<moco::tf::TFConv2D>(graph, addparam, conv2d);
    }
+  else if (auto dw_conv2d = dynamic_cast<moco::tf::TFDepthwiseConv2dNative *>(precedingOp))
+  {
+    biasadd = create_biasadd_node<moco::tf::TFDepthwiseConv2dNative>(graph, addparam, dw_conv2d);
+  }
    else if (auto old_bias_add = dynamic_cast<moco::tf::TFBiasAdd *>(precedingOp))
    {
      biasadd = old_bias_add;
author	남궁석/On-Device Lab(SR)/Engineer/삼성전자 <sk.namkoong@samsung.com>
	Mon, 12 Aug 2019 07:27:49 +0000 (16:27 +0900)
committer	박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
	Mon, 12 Aug 2019 07:27:49 +0000 (16:27 +0900)