[mir_caffe] Switch to binary elementwise operations (#6409)
authorСергей Баранников/AI Tools Lab /SRR/Engineer/삼성전자 <s.barannikov@samsung.com>
Fri, 9 Aug 2019 09:20:32 +0000 (12:20 +0300)
committerAlexander Efimov/AI Tools Lab/./Samsung Electronics <a.efimov@samsung.com>
Fri, 9 Aug 2019 09:20:32 +0000 (12:20 +0300)
Switch to new binary elementwise operations.

Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
compiler/mir-caffe-importer/caffe_op_creator.cpp
compiler/mir-caffe-importer/caffe_op_creator.h

index 91b135f..f6f6ab9 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "caffe_op_creator.h"
 
+#include "mir/ops/AddOp.h"
 #include "mir/ops/BatchNormOp.h"
 #include "mir/ops/CappedReluOp.h"
 #include "mir/ops/ConcatOp.h"
 #include "mir/ops/Deconv2DOp.h"
 #include "mir/ops/DepthwiseConv2DOp.h"
 #include "mir/ops/DropoutOp.h"
-#include "mir/ops/ElementwiseOp.h"
 #include "mir/ops/EluOp.h"
 #include "mir/ops/FullyConnectedOp.h"
 #include "mir/ops/GatherOp.h"
 #include "mir/ops/LeakyReluOp.h"
+#include "mir/ops/MaxOp.h"
+#include "mir/ops/MulOp.h"
 #include "mir/ops/PoolOp.h"
 #include "mir/ops/ReluOp.h"
 #include "mir/ops/ReshapeOp.h"
@@ -127,24 +129,6 @@ mir::Operation::Output *CaffeOpCreator::convertMIRToCaffe(mir::Operation::Output
   return transpose->getOutput(0);
 }
 
-mir::Operation::Output *CaffeOpCreator::createAdd(const std::string &name,
-                                                  mir::Operation::Output *arg1,
-                                                  mir::Operation::Output *arg2)
-{
-  std::vector<mir::Operation::Output *> inputs{arg1, arg2};
-  auto op = createOp<ops::ElementwiseOp>(name, inputs, ops::ElementwiseOp::OpType::add);
-  return op->getOutput(0);
-}
-
-mir::Operation::Output *CaffeOpCreator::createMul(const std::string &name,
-                                                  mir::Operation::Output *arg1,
-                                                  mir::Operation::Output *arg2)
-{
-  std::vector<mir::Operation::Output *> inputs{arg1, arg2};
-  auto op = createOp<ops::ElementwiseOp>(name, inputs, ops::ElementwiseOp::OpType::mul);
-  return op->getOutput(0);
-}
-
 /// @brief Split arg into @p num_parts equal parts along @p axis axis.
 std::vector<mir::Operation::Output *> CaffeOpCreator::createSplit(mir::Operation::Output *arg,
                                                                   int32_t num_parts, int32_t axis)
@@ -362,7 +346,7 @@ CaffeOpCreator::convertConvolution(const caffe::LayerParameter &layer,
   if (params.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createAdd(layer.name() + ".bias", result, bias);
+    result = createOp<ops::AddOp>(layer.name() + ".bias", result, bias)->getOutput(0);
   }
 
   return {convertMIRToCaffe(result)};
@@ -395,7 +379,7 @@ CaffeOpCreator::convertDeconvolution(const caffe::LayerParameter &layer,
   if (opts.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createAdd(layer.name() + ".bias", result, bias);
+    result = createOp<ops::AddOp>(layer.name() + ".bias", result, bias)->getOutput(0);
   }
 
   return {convertMIRToCaffe(result)};
@@ -418,7 +402,7 @@ CaffeOpCreator::convertInnerProduct(const LayerParameter &layer,
   if (params.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createAdd(layer.name() + ".bias", result, bias);
+    result = createOp<ops::AddOp>(layer.name() + ".bias", result, bias)->getOutput(0);
   }
 
   return {result};
@@ -624,13 +608,14 @@ CaffeOpCreator::convertScale(const caffe::LayerParameter &layer,
 {
   const auto &params = layer.scale_param();
   auto scale = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(0)))->getOutput(0);
-  auto result = createMul(layer.name(), convertCaffeToMIR(inputs[0]), scale);
+  auto result =
+      createOp<ops::MulOp>(layer.name(), convertCaffeToMIR(inputs[0]), scale)->getOutput(0);
 
   // Add the bias, if any.
   if (params.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createAdd(layer.name() + ".bias", result, bias);
+    result = createOp<ops::AddOp>(layer.name() + ".bias", result, bias)->getOutput(0);
   }
 
   return {convertMIRToCaffe(result)};
@@ -667,7 +652,8 @@ CaffeOpCreator::convertBatchNorm(const caffe::LayerParameter &layer,
   for (Index idx : ShapeRange(bias_data.getShape()))
     bias_data.at(idx) *= -scale_factor;
   auto mean = createOp<ops::ConstantOp>("", mean_weights)->getOutput(0);
-  auto result = createAdd(layer.name() + ".bias", convertCaffeToMIR(inputs[0]), mean);
+  auto result = createOp<ops::AddOp>(layer.name() + ".bias", convertCaffeToMIR(inputs[0]), mean)
+                    ->getOutput(0);
 
   // create scale argument from variance:
   // multiply elements of variance by scaleFactor and
@@ -677,7 +663,7 @@ CaffeOpCreator::convertBatchNorm(const caffe::LayerParameter &layer,
   for (Index idx : ShapeRange(scale_data.getShape()))
     scale_data.at(idx) = 1.0f / std::sqrt(scale_data.at(idx) * scale_factor + eps);
   auto variance = createOp<ops::ConstantOp>("", variance_weights)->getOutput(0);
-  result = createMul(layer.name() + ".scale", result, variance);
+  result = createOp<ops::MulOp>(layer.name() + ".scale", result, variance)->getOutput(0);
   return {convertMIRToCaffe(result)};
 }
 
@@ -712,7 +698,7 @@ CaffeOpCreator::convertEmbed(const caffe::LayerParameter &layer,
   if (params.bias_term())
   {
     auto bias = createOp<ops::ConstantOp>("", convertBlob(layer.blobs(1)))->getOutput(0);
-    result = createAdd(layer.name() + ".bias", result, bias);
+    result = createOp<ops::AddOp>(layer.name() + ".bias", result, bias)->getOutput(0);
   }
 
   return {result};
@@ -738,54 +724,57 @@ std::vector<mir::Operation::Output *>
 CaffeOpCreator::convertEltwise(const caffe::LayerParameter &layer,
                                const std::vector<mir::Operation::Output *> &inputs)
 {
-  auto &opts = layer.eltwise_param();
-  ops::ElementwiseOp::OpType optype;
-  std::vector<mir::Operation::Output *> input_tensors;
-  switch (opts.operation())
+  auto &params = layer.eltwise_param();
+
+  mir::Operation::Output *result;
+  switch (params.operation())
   {
     case EltwiseParameter_EltwiseOp_PROD:
-      optype = ops::ElementwiseOp::OpType::mul;
-      for (auto &i : inputs)
-        input_tensors.push_back(i);
+    {
+      result = createOp<ops::MulOp>("", inputs[0], inputs[1])->getOutput(0);
+      for (int i = 2; i < layer.bottom_size(); ++i)
+      {
+        result = createOp<ops::MulOp>("", result, inputs[i])->getOutput(0);
+      }
       break;
+    }
     case EltwiseParameter_EltwiseOp_SUM:
-      optype = ops::ElementwiseOp::OpType::add;
-      if (!opts.coeff().empty())
+    {
+      std::vector<mir::Operation::Output *> scaled_inputs = inputs;
+      if (params.coeff_size() > 0)
       {
-        assert(opts.coeff().size() == static_cast<int>(inputs.size()));
-        for (int i = 0; i < opts.coeff().size(); i++)
+        assert(params.coeff_size() == layer.bottom_size());
+        for (int i = 0; i < layer.bottom_size(); i++)
         {
-          if (opts.coeff().Get(i) != 1.0f)
-          {
-            TensorVariant coeff_tensor(mir::DataType::FLOAT32, Shape{1}, &opts.coeff().Get(i));
-            auto coeff_const = createOp<ops::ConstantOp>(layer.name() + "_const", coeff_tensor);
-            std::vector<mir::Operation::Output *> mul_inputs;
-            mul_inputs.push_back(coeff_const->getOutput(0));
-            mul_inputs.push_back(inputs[i]);
-            auto mul = createOp<ops::ElementwiseOp>(layer.name() + "_mul", mul_inputs,
-                                                    ops::ElementwiseOp::OpType::mul);
-            input_tensors.push_back(mul->getOutput(0));
-          }
-          else
+          if (params.coeff(i) != 1.0f)
           {
-            input_tensors.push_back(inputs[i]);
+            const float coeff_val = params.coeff(i);
+            TensorVariant coeff_tensor(mir::DataType::FLOAT32, Shape{}, &coeff_val);
+            auto coeff_const = createOp<ops::ConstantOp>("", coeff_tensor)->getOutput(0);
+            scaled_inputs[i] = createOp<ops::MulOp>("", coeff_const, inputs[i])->getOutput(0);
           }
         }
       }
-      else
+      result = createOp<ops::AddOp>("", scaled_inputs[0], scaled_inputs[1])->getOutput(0);
+      for (int i = 2; i < layer.bottom_size(); ++i)
       {
-        for (auto &i : inputs)
-          input_tensors.push_back(i);
+        result = createOp<ops::AddOp>("", result, scaled_inputs[i])->getOutput(0);
       }
       break;
+    }
     case EltwiseParameter_EltwiseOp_MAX:
-      optype = ops::ElementwiseOp::OpType::max;
-      for (auto &i : inputs)
-        input_tensors.push_back(i);
+    {
+      result = createOp<ops::MaxOp>("", inputs[0], inputs[1])->getOutput(0);
+      for (int i = 2; i < layer.bottom_size(); ++i)
+      {
+        result = createOp<ops::MaxOp>("", result, inputs[i])->getOutput(0);
+      }
       break;
+    }
+    default:
+      throw std::runtime_error("Unknown element-wise operation.");
   }
-  auto elementwise = createOp<ops::ElementwiseOp>(layer.name(), input_tensors, optype);
-  return {elementwise->getOutput(0)};
+  return {result};
 }
 
 std::vector<mir::Operation::Output *>
@@ -897,7 +886,7 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer,
   auto h_t = createOp<ops::ConstantOp>("", zero_tensor)->getOutput(0);
 
   auto x_xw = createFullyConnected(x, xw, 2);
-  auto x_xw_b = createAdd("", x_xw, xb);
+  auto x_xw_b = createOp<ops::AddOp>("", x_xw, xb)->getOutput(0);
 
   // Split input and continuation tensors into seq_length slices.
   std::vector<mir::Operation::Output *> x_xw_b_slices = createSplit(x_xw_b, seq_length, 0);
@@ -906,12 +895,12 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer,
 
   for (int32_t t = 0; t < seq_length; t++)
   {
-    auto c_cont_t = createMul("", c_t, cont_slices[t]);
-    auto h_cont_t = createMul("", h_t, cont_slices[t]);
+    auto c_cont_t = createOp<ops::MulOp>("", c_t, cont_slices[t])->getOutput(0);
+    auto h_cont_t = createOp<ops::MulOp>("", h_t, cont_slices[t])->getOutput(0);
 
     auto x_xw_b_t = x_xw_b_slices[t];
     auto h_hw_t = createFullyConnected(h_cont_t, hw, 2);
-    auto activation_inputs_concat = createAdd("", x_xw_b_t, h_hw_t);
+    auto activation_inputs_concat = createOp<ops::AddOp>("", x_xw_b_t, h_hw_t)->getOutput(0);
     auto activation_inputs = createSplit(activation_inputs_concat, 4, 2);
 
     auto i_t = createOp<ops::SigmoidOp>("", activation_inputs[0])->getOutput(0);
@@ -919,8 +908,10 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer,
     auto o_t = createOp<ops::SigmoidOp>("", activation_inputs[2])->getOutput(0);
     auto g_t = createOp<ops::TanhOp>("", activation_inputs[3])->getOutput(0);
 
-    c_t = createAdd("", createMul("", c_cont_t, f_t), createMul("", i_t, g_t));
-    h_t = createMul("", createOp<ops::TanhOp>("", c_t)->getOutput(0), o_t);
+    c_t = createOp<ops::AddOp>("", createOp<ops::MulOp>("", c_cont_t, f_t)->getOutput(0),
+                               createOp<ops::MulOp>("", i_t, g_t)->getOutput(0))
+              ->getOutput(0);
+    h_t = createOp<ops::MulOp>("", createOp<ops::TanhOp>("", c_t)->getOutput(0), o_t)->getOutput(0);
 
     h_slices[t] = h_t;
   }
index 5152c4f..7a3abce 100644 (file)
@@ -131,12 +131,6 @@ private:
 
   mir::Operation::Output *convertMIRToCaffe(mir::Operation::Output *arg);
 
-  mir::Operation::Output *createAdd(const std::string &name, mir::Operation::Output *arg1,
-                                    mir::Operation::Output *arg2);
-
-  mir::Operation::Output *createMul(const std::string &name, mir::Operation::Output *arg1,
-                                    mir::Operation::Output *arg2);
-
   std::vector<mir::Operation::Output *> createSplit(mir::Operation::Output *arg, int32_t num_parts,
                                                     int32_t axis);