From d9b02bf2ada95fbcf637a21b4e41f0e61c67d578 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?=
 =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?=
 =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?=
 <s.barannikov@samsung.com>
Date: Tue, 6 Aug 2019 19:53:03 +0300
Subject: [PATCH] [mir_caffe] Replace BiasAdd and Scale with elementwise
 equivalents (#6295)

`BiasAdd` and `Scale` are restricted versions of equivalent Elementwise ops and are going to be removed.

Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
---
 compiler/mir-caffe-importer/caffe_op_creator.cpp | 67 +++++++++++++-----------
 compiler/mir-caffe-importer/caffe_op_creator.h   |  6 ++-
 2 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/compiler/mir-caffe-importer/caffe_op_creator.cpp b/compiler/mir-caffe-importer/caffe_op_creator.cpp
index 3b801ab..91b135f 100644
--- a/compiler/mir-caffe-importer/caffe_op_creator.cpp
+++ b/compiler/mir-caffe-importer/caffe_op_creator.cpp
@@ -17,7 +17,6 @@
 #include "caffe_op_creator.h"
 
 #include "mir/ops/BatchNormOp.h"
-#include "mir/ops/BiasAddOp.h"
 #include "mir/ops/CappedReluOp.h"
 #include "mir/ops/ConcatOp.h"
 #include "mir/ops/ConstantOp.h"
@@ -33,7 +32,6 @@
 #include "mir/ops/PoolOp.h"
 #include "mir/ops/ReluOp.h"
 #include "mir/ops/ReshapeOp.h"
-#include "mir/ops/ScaleOp.h"
 #include "mir/ops/SigmoidOp.h"
 #include "mir/ops/SliceOp.h"
 #include "mir/ops/SoftmaxOp.h"
@@ -129,19 +127,21 @@ mir::Operation::Output *CaffeOpCreator::convertMIRToCaffe(mir::Operation::Output
   return transpose->getOutput(0);
 }
 
-mir::Operation::Output *CaffeOpCreator::createAdd(mir::Operation::Output *arg1,
+mir::Operation::Output *CaffeOpCreator::createAdd(const std::string &name,
+                                                  mir::Operation::Output *arg1,
                                                   mir::Operation::Output *arg2)
 {
   std::vector<mir::Operation::Output *> inputs{arg1, arg2};
-  auto op = createOp<ops::ElementwiseOp>("", inputs, ops::ElementwiseOp::OpType::add);
+  auto op = createOp<ops::ElementwiseOp>(name, inputs, ops::ElementwiseOp::OpType::add);
   return op->getOutput(0);
 }
 
-mir::Operation::Output *CaffeOpCreator::createMul(mir::Operation::Output *arg1,
+mir::Operation::Output *CaffeOpCreator::createMul(const std::string &name,
+                                                  mir::Operation::Output *arg1,
                                                   mir::Operation::Output *arg2)
 {
   std::vector<mir::Operation::Output *> inputs{arg1, arg2};
-  auto op = createOp<ops::ElementwiseOp>("", inputs, ops::ElementwiseOp::OpType::mul);
+  auto op = createOp<ops::ElementwiseOp>(name, inputs, ops::ElementwiseOp::OpType::mul);
   return op->getOutput(0);
 }
 
@@ -329,7 +329,7 @@ CaffeOpCreator::convertConvolution(const caffe::LayerParameter &layer,
   auto kernel_weights = convertBlob(layer.blobs(0));
   kernel_weights = transposeTensor<2, 3, 1, 0>(kernel_weights);
 
-  Operation *result;
+  Operation::Output *result;
   auto in_group_size = kernel_weights.getShape().dim(2);
   auto out_channels = kernel_weights.getShape().dim(3);
   int32_t num_groups = params.group();
@@ -341,7 +341,8 @@ CaffeOpCreator::convertConvolution(const caffe::LayerParameter &layer,
     auto transposed_tensor = transposeTensor<0, 1, 3, 2>(kernel_weights);
     auto kernel = createOp<ops::ConstantOp>("", transposed_tensor)->getOutput(0);
     result = createOp<ops::DepthwiseConv2DOp>(layer.name(), convertCaffeToMIR(inputs[0]), kernel,
-                                              strides, padding, padding);
+                                              strides, padding, padding)
+                 ->getOutput(0);
   }
   else
   {
@@ -353,17 +354,18 @@ CaffeOpCreator::convertConvolution(const caffe::LayerParameter &layer,
     kernel_weights = transposeTensor<3, 0, 1, 2>(kernel_weights);
     auto kernel = createOp<ops::ConstantOp>("", kernel_weights)->getOutput(0);
     result = createOp<ops::Conv2DOp>(layer.name(), convertCaffeToMIR(inputs[0]), kernel, strides,
-                                     padding, padding);
+                                     padding, padding)
+                 ->getOutput(0);
   }
 
   // Add the bias, if any.
   if (params.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createOp<ops::BiasAddOp>(layer.name() + ".bias", result->getOutput(0), bias);
+    result = createAdd(layer.name() + ".bias", result, bias);
   }
 
-  return {convertMIRToCaffe(result->getOutput(0))};
+  return {convertMIRToCaffe(result)};
 }
 
 std::vector<mir::Operation::Output *>
@@ -386,16 +388,17 @@ CaffeOpCreator::convertDeconvolution(const caffe::LayerParameter &layer,
   }
   auto kernel = createOp<ops::ConstantOp>("", kernel_weights)->getOutput(0);
   auto result = createOp<ops::DeConv2DOp>(layer.name(), convertCaffeToMIR(inputs[0]), kernel,
-                                          strides, padding);
+                                          strides, padding)
+                    ->getOutput(0);
 
   // bias_term is optional (so might not be present) and defaults to true
   if (opts.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createOp<ops::BiasAddOp>(layer.name() + ".bias", result->getOutput(0), bias);
+    result = createAdd(layer.name() + ".bias", result, bias);
   }
 
-  return {convertMIRToCaffe(result->getOutput(0))};
+  return {convertMIRToCaffe(result)};
 }
 
 std::vector<mir::Operation::Output *>
@@ -415,7 +418,7 @@ CaffeOpCreator::convertInnerProduct(const LayerParameter &layer,
   if (params.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createOp<ops::BiasAddOp>(layer.name() + ".bias", result, bias)->getOutput(0);
+    result = createAdd(layer.name() + ".bias", result, bias);
   }
 
   return {result};
@@ -621,16 +624,16 @@ CaffeOpCreator::convertScale(const caffe::LayerParameter &layer,
 {
   const auto &params = layer.scale_param();
   auto scale = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(0)))->getOutput(0);
-  auto result = createOp<ops::ScaleOp>(layer.name(), convertCaffeToMIR(inputs[0]), scale);
+  auto result = createMul(layer.name(), convertCaffeToMIR(inputs[0]), scale);
 
   // Add the bias, if any.
   if (params.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createOp<ops::BiasAddOp>(layer.name() + ".bias", result->getOutput(0), bias);
+    result = createAdd(layer.name() + ".bias", result, bias);
   }
 
-  return {convertMIRToCaffe(result->getOutput(0))};
+  return {convertMIRToCaffe(result)};
 }
 
 void CaffeOpCreator::checkBatchNorm(const caffe::LayerParameter &layer,
@@ -658,14 +661,13 @@ CaffeOpCreator::convertBatchNorm(const caffe::LayerParameter &layer,
 
   // create bias argument from mean:
   // multiply elements of mean by scaleFactor and get opposite numbers
-  // to subtract mean from input via biasAdd operation
+  // to subtract mean from input via add operation
   auto mean_weights = convertBlob(layer.blobs(0));
   Tensor<float> bias_data(mean_weights);
   for (Index idx : ShapeRange(bias_data.getShape()))
     bias_data.at(idx) *= -scale_factor;
   auto mean = createOp<ops::ConstantOp>("", mean_weights)->getOutput(0);
-  auto result =
-      createOp<ops::BiasAddOp>(layer.name() + ".bias", convertCaffeToMIR(inputs[0]), mean);
+  auto result = createAdd(layer.name() + ".bias", convertCaffeToMIR(inputs[0]), mean);
 
   // create scale argument from variance:
   // multiply elements of variance by scaleFactor and
@@ -675,8 +677,8 @@ CaffeOpCreator::convertBatchNorm(const caffe::LayerParameter &layer,
   for (Index idx : ShapeRange(scale_data.getShape()))
     scale_data.at(idx) = 1.0f / std::sqrt(scale_data.at(idx) * scale_factor + eps);
   auto variance = createOp<ops::ConstantOp>("", variance_weights)->getOutput(0);
-  result = createOp<ops::ScaleOp>(layer.name() + ".scale", result->getOutput(0), variance);
-  return {convertMIRToCaffe(result->getOutput(0))};
+  result = createMul(layer.name() + ".scale", result, variance);
+  return {convertMIRToCaffe(result)};
 }
 
 std::vector<mir::Operation::Output *>
@@ -703,16 +705,17 @@ CaffeOpCreator::convertEmbed(const caffe::LayerParameter &layer,
 {
   const auto &params = layer.embed_param();
   auto data = createOp<ops::ConstantOp>(layer.name() + ".weights", convertBlob(layer.blobs(0)));
-  auto result = createOp<ops::GatherOp>(layer.name(), data->getOutput(0), inputs[0], 0);
+  auto result =
+      createOp<ops::GatherOp>(layer.name(), data->getOutput(0), inputs[0], 0)->getOutput(0);
 
   // Add the bias, if any.
   if (params.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createOp<ops::BiasAddOp>(layer.name() + ".bias", result->getOutput(0), bias);
+    result = createAdd(layer.name() + ".bias", result, bias);
   }
 
-  return {result->getOutput(0)};
+  return {result};
 }
 
 std::vector<mir::Operation::Output *>
@@ -894,7 +897,7 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer,
   auto h_t = createOp<ops::ConstantOp>("", zero_tensor)->getOutput(0);
 
   auto x_xw = createFullyConnected(x, xw, 2);
-  auto x_xw_b = createOp<ops::BiasAddOp>("", x_xw, xb)->getOutput(0);
+  auto x_xw_b = createAdd("", x_xw, xb);
 
   // Split input and continuation tensors into seq_length slices.
   std::vector<mir::Operation::Output *> x_xw_b_slices = createSplit(x_xw_b, seq_length, 0);
@@ -903,12 +906,12 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer,
 
   for (int32_t t = 0; t < seq_length; t++)
   {
-    auto c_cont_t = createMul(c_t, cont_slices[t]);
-    auto h_cont_t = createMul(h_t, cont_slices[t]);
+    auto c_cont_t = createMul("", c_t, cont_slices[t]);
+    auto h_cont_t = createMul("", h_t, cont_slices[t]);
 
     auto x_xw_b_t = x_xw_b_slices[t];
     auto h_hw_t = createFullyConnected(h_cont_t, hw, 2);
-    auto activation_inputs_concat = createAdd(x_xw_b_t, h_hw_t);
+    auto activation_inputs_concat = createAdd("", x_xw_b_t, h_hw_t);
     auto activation_inputs = createSplit(activation_inputs_concat, 4, 2);
 
     auto i_t = createOp<ops::SigmoidOp>("", activation_inputs[0])->getOutput(0);
@@ -916,8 +919,8 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer,
     auto o_t = createOp<ops::SigmoidOp>("", activation_inputs[2])->getOutput(0);
     auto g_t = createOp<ops::TanhOp>("", activation_inputs[3])->getOutput(0);
 
-    c_t = createAdd(createMul(c_cont_t, f_t), createMul(i_t, g_t));
-    h_t = createMul(createOp<ops::TanhOp>("", c_t)->getOutput(0), o_t);
+    c_t = createAdd("", createMul("", c_cont_t, f_t), createMul("", i_t, g_t));
+    h_t = createMul("", createOp<ops::TanhOp>("", c_t)->getOutput(0), o_t);
 
     h_slices[t] = h_t;
   }
diff --git a/compiler/mir-caffe-importer/caffe_op_creator.h b/compiler/mir-caffe-importer/caffe_op_creator.h
index b57d098..5152c4f 100644
--- a/compiler/mir-caffe-importer/caffe_op_creator.h
+++ b/compiler/mir-caffe-importer/caffe_op_creator.h
@@ -131,9 +131,11 @@ private:
 
   mir::Operation::Output *convertMIRToCaffe(mir::Operation::Output *arg);
 
-  mir::Operation::Output *createAdd(mir::Operation::Output *arg1, mir::Operation::Output *arg2);
+  mir::Operation::Output *createAdd(const std::string &name, mir::Operation::Output *arg1,
+                                    mir::Operation::Output *arg2);
 
-  mir::Operation::Output *createMul(mir::Operation::Output *arg1, mir::Operation::Output *arg2);
+  mir::Operation::Output *createMul(const std::string &name, mir::Operation::Output *arg1,
+                                    mir::Operation::Output *arg2);
 
   std::vector<mir::Operation::Output *> createSplit(mir::Operation::Output *arg, int32_t num_parts,
                                                     int32_t axis);
-- 
2.7.4