From d9b02bf2ada95fbcf637a21b4e41f0e61c67d578 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?= =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?= =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Tue, 6 Aug 2019 19:53:03 +0300 Subject: [PATCH] [mir_caffe] Replace BiasAdd and Scale with elementwise equivalents (#6295) `BiasAdd` and `Scale` are restricted versions of equivalent Elementwise ops and are going to be removed. Signed-off-by: Sergei Barannikov --- compiler/mir-caffe-importer/caffe_op_creator.cpp | 67 +++++++++++++----------- compiler/mir-caffe-importer/caffe_op_creator.h | 6 ++- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/compiler/mir-caffe-importer/caffe_op_creator.cpp b/compiler/mir-caffe-importer/caffe_op_creator.cpp index 3b801ab..91b135f 100644 --- a/compiler/mir-caffe-importer/caffe_op_creator.cpp +++ b/compiler/mir-caffe-importer/caffe_op_creator.cpp @@ -17,7 +17,6 @@ #include "caffe_op_creator.h" #include "mir/ops/BatchNormOp.h" -#include "mir/ops/BiasAddOp.h" #include "mir/ops/CappedReluOp.h" #include "mir/ops/ConcatOp.h" #include "mir/ops/ConstantOp.h" @@ -33,7 +32,6 @@ #include "mir/ops/PoolOp.h" #include "mir/ops/ReluOp.h" #include "mir/ops/ReshapeOp.h" -#include "mir/ops/ScaleOp.h" #include "mir/ops/SigmoidOp.h" #include "mir/ops/SliceOp.h" #include "mir/ops/SoftmaxOp.h" @@ -129,19 +127,21 @@ mir::Operation::Output *CaffeOpCreator::convertMIRToCaffe(mir::Operation::Output return transpose->getOutput(0); } -mir::Operation::Output *CaffeOpCreator::createAdd(mir::Operation::Output *arg1, +mir::Operation::Output *CaffeOpCreator::createAdd(const std::string &name, + mir::Operation::Output *arg1, mir::Operation::Output *arg2) { std::vector inputs{arg1, arg2}; - auto op = createOp("", inputs, ops::ElementwiseOp::OpType::add); + auto op = createOp(name, inputs, ops::ElementwiseOp::OpType::add); return op->getOutput(0); } -mir::Operation::Output *CaffeOpCreator::createMul(mir::Operation::Output *arg1, +mir::Operation::Output *CaffeOpCreator::createMul(const std::string &name, + mir::Operation::Output *arg1, mir::Operation::Output *arg2) { std::vector inputs{arg1, arg2}; - auto op = createOp("", inputs, ops::ElementwiseOp::OpType::mul); + auto op = createOp(name, inputs, ops::ElementwiseOp::OpType::mul); return op->getOutput(0); } @@ -329,7 +329,7 @@ CaffeOpCreator::convertConvolution(const caffe::LayerParameter &layer, auto kernel_weights = convertBlob(layer.blobs(0)); kernel_weights = transposeTensor<2, 3, 1, 0>(kernel_weights); - Operation *result; + Operation::Output *result; auto in_group_size = kernel_weights.getShape().dim(2); auto out_channels = kernel_weights.getShape().dim(3); int32_t num_groups = params.group(); @@ -341,7 +341,8 @@ CaffeOpCreator::convertConvolution(const caffe::LayerParameter &layer, auto transposed_tensor = transposeTensor<0, 1, 3, 2>(kernel_weights); auto kernel = createOp("", transposed_tensor)->getOutput(0); result = createOp(layer.name(), convertCaffeToMIR(inputs[0]), kernel, - strides, padding, padding); + strides, padding, padding) + ->getOutput(0); } else { @@ -353,17 +354,18 @@ CaffeOpCreator::convertConvolution(const caffe::LayerParameter &layer, kernel_weights = transposeTensor<3, 0, 1, 2>(kernel_weights); auto kernel = createOp("", kernel_weights)->getOutput(0); result = createOp(layer.name(), convertCaffeToMIR(inputs[0]), kernel, strides, - padding, padding); + padding, padding) + ->getOutput(0); } // Add the bias, if any. if (params.bias_term()) { auto bias = createOp("", convertBlob(layer.blobs(1)))->getOutput(0); - result = createOp(layer.name() + ".bias", result->getOutput(0), bias); + result = createAdd(layer.name() + ".bias", result, bias); } - return {convertMIRToCaffe(result->getOutput(0))}; + return {convertMIRToCaffe(result)}; } std::vector @@ -386,16 +388,17 @@ CaffeOpCreator::convertDeconvolution(const caffe::LayerParameter &layer, } auto kernel = createOp("", kernel_weights)->getOutput(0); auto result = createOp(layer.name(), convertCaffeToMIR(inputs[0]), kernel, - strides, padding); + strides, padding) + ->getOutput(0); // bias_term is optional (so might not be present) and defaults to true if (opts.bias_term()) { auto bias = createOp("", convertBlob(layer.blobs(1)))->getOutput(0); - result = createOp(layer.name() + ".bias", result->getOutput(0), bias); + result = createAdd(layer.name() + ".bias", result, bias); } - return {convertMIRToCaffe(result->getOutput(0))}; + return {convertMIRToCaffe(result)}; } std::vector @@ -415,7 +418,7 @@ CaffeOpCreator::convertInnerProduct(const LayerParameter &layer, if (params.bias_term()) { auto bias = createOp("", convertBlob(layer.blobs(1)))->getOutput(0); - result = createOp(layer.name() + ".bias", result, bias)->getOutput(0); + result = createAdd(layer.name() + ".bias", result, bias); } return {result}; @@ -621,16 +624,16 @@ CaffeOpCreator::convertScale(const caffe::LayerParameter &layer, { const auto ¶ms = layer.scale_param(); auto scale = createOp("", convertBlob(layer.blobs(0)))->getOutput(0); - auto result = createOp(layer.name(), convertCaffeToMIR(inputs[0]), scale); + auto result = createMul(layer.name(), convertCaffeToMIR(inputs[0]), scale); // Add the bias, if any. if (params.bias_term()) { auto bias = createOp("", convertBlob(layer.blobs(1)))->getOutput(0); - result = createOp(layer.name() + ".bias", result->getOutput(0), bias); + result = createAdd(layer.name() + ".bias", result, bias); } - return {convertMIRToCaffe(result->getOutput(0))}; + return {convertMIRToCaffe(result)}; } void CaffeOpCreator::checkBatchNorm(const caffe::LayerParameter &layer, @@ -658,14 +661,13 @@ CaffeOpCreator::convertBatchNorm(const caffe::LayerParameter &layer, // create bias argument from mean: // multiply elements of mean by scaleFactor and get opposite numbers - // to subtract mean from input via biasAdd operation + // to subtract mean from input via add operation auto mean_weights = convertBlob(layer.blobs(0)); Tensor bias_data(mean_weights); for (Index idx : ShapeRange(bias_data.getShape())) bias_data.at(idx) *= -scale_factor; auto mean = createOp("", mean_weights)->getOutput(0); - auto result = - createOp(layer.name() + ".bias", convertCaffeToMIR(inputs[0]), mean); + auto result = createAdd(layer.name() + ".bias", convertCaffeToMIR(inputs[0]), mean); // create scale argument from variance: // multiply elements of variance by scaleFactor and @@ -675,8 +677,8 @@ CaffeOpCreator::convertBatchNorm(const caffe::LayerParameter &layer, for (Index idx : ShapeRange(scale_data.getShape())) scale_data.at(idx) = 1.0f / std::sqrt(scale_data.at(idx) * scale_factor + eps); auto variance = createOp("", variance_weights)->getOutput(0); - result = createOp(layer.name() + ".scale", result->getOutput(0), variance); - return {convertMIRToCaffe(result->getOutput(0))}; + result = createMul(layer.name() + ".scale", result, variance); + return {convertMIRToCaffe(result)}; } std::vector @@ -703,16 +705,17 @@ CaffeOpCreator::convertEmbed(const caffe::LayerParameter &layer, { const auto ¶ms = layer.embed_param(); auto data = createOp(layer.name() + ".weights", convertBlob(layer.blobs(0))); - auto result = createOp(layer.name(), data->getOutput(0), inputs[0], 0); + auto result = + createOp(layer.name(), data->getOutput(0), inputs[0], 0)->getOutput(0); // Add the bias, if any. if (params.bias_term()) { auto bias = createOp("", convertBlob(layer.blobs(1)))->getOutput(0); - result = createOp(layer.name() + ".bias", result->getOutput(0), bias); + result = createAdd(layer.name() + ".bias", result, bias); } - return {result->getOutput(0)}; + return {result}; } std::vector @@ -894,7 +897,7 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer, auto h_t = createOp("", zero_tensor)->getOutput(0); auto x_xw = createFullyConnected(x, xw, 2); - auto x_xw_b = createOp("", x_xw, xb)->getOutput(0); + auto x_xw_b = createAdd("", x_xw, xb); // Split input and continuation tensors into seq_length slices. std::vector x_xw_b_slices = createSplit(x_xw_b, seq_length, 0); @@ -903,12 +906,12 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer, for (int32_t t = 0; t < seq_length; t++) { - auto c_cont_t = createMul(c_t, cont_slices[t]); - auto h_cont_t = createMul(h_t, cont_slices[t]); + auto c_cont_t = createMul("", c_t, cont_slices[t]); + auto h_cont_t = createMul("", h_t, cont_slices[t]); auto x_xw_b_t = x_xw_b_slices[t]; auto h_hw_t = createFullyConnected(h_cont_t, hw, 2); - auto activation_inputs_concat = createAdd(x_xw_b_t, h_hw_t); + auto activation_inputs_concat = createAdd("", x_xw_b_t, h_hw_t); auto activation_inputs = createSplit(activation_inputs_concat, 4, 2); auto i_t = createOp("", activation_inputs[0])->getOutput(0); @@ -916,8 +919,8 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter &layer, auto o_t = createOp("", activation_inputs[2])->getOutput(0); auto g_t = createOp("", activation_inputs[3])->getOutput(0); - c_t = createAdd(createMul(c_cont_t, f_t), createMul(i_t, g_t)); - h_t = createMul(createOp("", c_t)->getOutput(0), o_t); + c_t = createAdd("", createMul("", c_cont_t, f_t), createMul("", i_t, g_t)); + h_t = createMul("", createOp("", c_t)->getOutput(0), o_t); h_slices[t] = h_t; } diff --git a/compiler/mir-caffe-importer/caffe_op_creator.h b/compiler/mir-caffe-importer/caffe_op_creator.h index b57d098..5152c4f 100644 --- a/compiler/mir-caffe-importer/caffe_op_creator.h +++ b/compiler/mir-caffe-importer/caffe_op_creator.h @@ -131,9 +131,11 @@ private: mir::Operation::Output *convertMIRToCaffe(mir::Operation::Output *arg); - mir::Operation::Output *createAdd(mir::Operation::Output *arg1, mir::Operation::Output *arg2); + mir::Operation::Output *createAdd(const std::string &name, mir::Operation::Output *arg1, + mir::Operation::Output *arg2); - mir::Operation::Output *createMul(mir::Operation::Output *arg1, mir::Operation::Output *arg2); + mir::Operation::Output *createMul(const std::string &name, mir::Operation::Output *arg1, + mir::Operation::Output *arg2); std::vector createSplit(mir::Operation::Output *arg, int32_t num_parts, int32_t axis); -- 2.7.4