From 05c7b00c2a4cd514360cffb5974cb9ef863f26d6 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?=
 =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?=
 =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?=
 <s.barannikov@samsung.com>
Date: Wed, 14 Aug 2019 18:22:24 +0900
Subject: [PATCH] [mir_onnx] Reimplement Gemm converter through FullyConnected
 (#6604)

`GemmOp` is almost the same thing as `FullyConnectedOp`. It was only used in the ONNX importer.

Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
---
 compiler/mir-onnx-importer/ONNXHelpers.h |  2 +-
 compiler/mir-onnx-importer/Op/Gemm.cpp   | 79 ++++++++++++++------------------
 2 files changed, 35 insertions(+), 46 deletions(-)
diff --git a/compiler/mir-onnx-importer/ONNXHelpers.h b/compiler/mir-onnx-importer/ONNXHelpers.h
index 945bc24..c6d0441 100644
--- a/compiler/mir-onnx-importer/ONNXHelpers.h
+++ b/compiler/mir-onnx-importer/ONNXHelpers.h
@@ -77,7 +77,7 @@ inline float getFloatAttribute(const onnx::NodeProto &onnx_node, const std::stri
 }
 
 // Create vector tensor filled with the given value
-// TODO: it should be template
+// TODO Remove.
 inline mir::TensorVariant createScalarTensor(float value, const mir::Shape &shape)
 {
   std::vector<float> values(static_cast<std::size_t>(shape.numElements()), value);
diff --git a/compiler/mir-onnx-importer/Op/Gemm.cpp b/compiler/mir-onnx-importer/Op/Gemm.cpp
index 5f4085a..a2d48df 100644
--- a/compiler/mir-onnx-importer/Op/Gemm.cpp
+++ b/compiler/mir-onnx-importer/Op/Gemm.cpp
@@ -20,8 +20,9 @@
 
 #include "mir/TensorVariant.h"
 
+#include "mir/ops/AddOp.h"
 #include "mir/ops/ConstantOp.h"
-#include "mir/ops/GemmOp.h"
+#include "mir/ops/FullyConnectedOp.h"
 #include "mir/ops/MulOp.h"
 #include "mir/ops/ReshapeOp.h"
 #include "mir/ops/TransposeOp.h"
@@ -34,60 +35,48 @@ GemmNodeConverter::convert(const onnx::NodeProto &onnx_node,
                            const std::vector<mir::Operation::Output *> &inputs,
                            mir::Graph *graph) const
 {
-  // Compute Y = alpha * A' * B' + beta * C, where input tensor A has shape (M, K) or (K, M),
-  // input tensor B has shape (K, N) or (N, K),
-  // input tensor C is broadcastable to shape (M, N), and output tensor Y has shape (M, N).
-  // A will be transposed before doing the computation if attribute transA is non-zero,
-  // same for B and transB. This operator supports unidirectional broadcasting
-  // (tensor C should be unidirectional broadcastable to tensor A * B).
+  assert(inputs.size() == 3);
+  auto a = inputs[0];
+  auto b = inputs[1];
+  auto c = inputs[2];
 
-  // 0 means that no transpose is needed. It is the default value
-  bool trans_a = getIntAttribute(onnx_node, "transA", 0);
-  bool trans_b = getIntAttribute(onnx_node, "transB", 0);
+  // 1.0f is the default factor.
+  const float alpha_val = getFloatAttribute(onnx_node, "alpha", 1.0f);
+  const float beta_val = getFloatAttribute(onnx_node, "beta", 1.0f);
 
-  // 1.0f is the default factor
-  float alpha_val = getFloatAttribute(onnx_node, "alpha", 1.0f);
-  float beta_val = getFloatAttribute(onnx_node, "beta", 1.0f);
+  // 0 means that no transpose is needed. It is the default value.
+  const bool trans_a = getIntAttribute(onnx_node, "transA", 0);
+  const bool trans_b = getIntAttribute(onnx_node, "transB", 0);
 
-  // 1. Prepare input matrix A
-  // Flatten the shape by dim(0)
-  const auto &in_shape = inputs[0]->getShape();
-  mir::Shape shape0{in_shape.dim(0), in_shape.numElements() / in_shape.dim(0)};
-  auto input_a = createOp<mir::ops::ReshapeOp>(graph, inputs[0], shape0)->getOutput(0);
+  // Transpose the A and B matrices as needed.
   if (trans_a)
-    input_a = createOp<mir::ops::TransposeOp>(graph, input_a, std::vector<std::size_t>{1, 0})
-                  ->getOutput(0);
-  if (alpha_val != 1.0)
+    a = createOp<mir::ops::TransposeOp>(graph, a, std::vector<std::size_t>{1, 0})->getOutput(0);
+  if (trans_b)
+    b = createOp<mir::ops::TransposeOp>(graph, b, std::vector<std::size_t>{1, 0})->getOutput(0);
+
+  // Calculate A * B.
+  auto ab = createOp<mir::ops::FullyConnectedOp>(graph, a, b)->getOutput(0);
+
+  // Multiply A * B by the constant factor.
+  if (alpha_val != 1.0f)
   {
-    auto alpha_tensor = createScalarTensor(alpha_val, input_a->getShape());
+    mir::TensorVariant alpha_tensor(mir::DataType::FLOAT32, {1}, &alpha_val);
     auto alpha = createOp<mir::ops::ConstantOp>(graph, alpha_tensor)->getOutput(0);
-    input_a = createOp<mir::ops::MulOp>(graph, input_a, alpha)->getOutput(0);
+    ab = createOp<mir::ops::MulOp>(graph, alpha, ab)->getOutput(0);
   }
 
-  // 2. Prepare input matrix B
-  //
-  auto input_b = inputs[1];
-  if (trans_b)
-    input_b = createOp<mir::ops::TransposeOp>(graph, input_b, std::vector<std::size_t>{1, 0})
-                  ->getOutput(0);
-  // Number of cols in tensor A must be equal to number of rows in tensor B
-  assert(input_a->getShape().dim(1) == input_b->getShape().dim(0));
-  mir::Shape mult_a_b{input_a->getShape().dim(0), input_b->getShape().dim(1)};
-
-  // 3. Prepare input matrix C
-  //
-  auto input_c = inputs[2];
-  auto beta_tensor = createScalarTensor(beta_val, input_c->getShape());
-  if ((mult_a_b.rank() == 2) && (input_c->getShape().rank() == 1))
+  // Multiply C by the constant factor.
+  if (beta_val != 1.0f)
   {
-    beta_tensor = mir::TensorVariant(beta_tensor, mult_a_b);
+    mir::TensorVariant beta_tensor(mir::DataType::FLOAT32, {1}, &beta_val);
+    auto beta = createOp<mir::ops::ConstantOp>(graph, beta_tensor)->getOutput(0);
+    c = createOp<mir::ops::MulOp>(graph, beta, c)->getOutput(0);
   }
-  auto beta = createOp<mir::ops::ConstantOp>(graph, beta_tensor)->getOutput(0);
-  std::vector<mir::Operation::Output *> mul_inputs = {beta, input_c};
-  auto c_mult = createOp<mir::ops::MulOp>(graph, beta, input_c)->getOutput(0);
-  assert(c_mult->getShape() == mult_a_b);
-  auto result = createOp<mir::ops::GemmOp>(graph, input_a, input_b, c_mult);
-  return {result->getOutput(0)};
+
+  // Calculate the result: alpha * A * B + beta * C.
+  auto result = createOp<mir::ops::AddOp>(graph, ab, c)->getOutput(0);
+
+  return {result};
 }
 
 } // namespace mir_onnx
-- 
2.7.4