From 05c7b00c2a4cd514360cffb5974cb9ef863f26d6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?= =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?= =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Wed, 14 Aug 2019 18:22:24 +0900 Subject: [PATCH] [mir_onnx] Reimplement Gemm converter through FullyConnected (#6604) `GemmOp` is almost the same thing as `FullyConnectedOp`. It was only used in the ONNX importer. Signed-off-by: Sergei Barannikov --- compiler/mir-onnx-importer/ONNXHelpers.h | 2 +- compiler/mir-onnx-importer/Op/Gemm.cpp | 79 ++++++++++++++------------------ 2 files changed, 35 insertions(+), 46 deletions(-) diff --git a/compiler/mir-onnx-importer/ONNXHelpers.h b/compiler/mir-onnx-importer/ONNXHelpers.h index 945bc24..c6d0441 100644 --- a/compiler/mir-onnx-importer/ONNXHelpers.h +++ b/compiler/mir-onnx-importer/ONNXHelpers.h @@ -77,7 +77,7 @@ inline float getFloatAttribute(const onnx::NodeProto &onnx_node, const std::stri } // Create vector tensor filled with the given value -// TODO: it should be template +// TODO Remove. inline mir::TensorVariant createScalarTensor(float value, const mir::Shape &shape) { std::vector values(static_cast(shape.numElements()), value); diff --git a/compiler/mir-onnx-importer/Op/Gemm.cpp b/compiler/mir-onnx-importer/Op/Gemm.cpp index 5f4085a..a2d48df 100644 --- a/compiler/mir-onnx-importer/Op/Gemm.cpp +++ b/compiler/mir-onnx-importer/Op/Gemm.cpp @@ -20,8 +20,9 @@ #include "mir/TensorVariant.h" +#include "mir/ops/AddOp.h" #include "mir/ops/ConstantOp.h" -#include "mir/ops/GemmOp.h" +#include "mir/ops/FullyConnectedOp.h" #include "mir/ops/MulOp.h" #include "mir/ops/ReshapeOp.h" #include "mir/ops/TransposeOp.h" @@ -34,60 +35,48 @@ GemmNodeConverter::convert(const onnx::NodeProto &onnx_node, const std::vector &inputs, mir::Graph *graph) const { - // Compute Y = alpha * A' * B' + beta * C, where input tensor A has shape (M, K) or (K, M), - // input tensor B has shape (K, N) or (N, K), - // input tensor C is broadcastable to shape (M, N), and output tensor Y has shape (M, N). - // A will be transposed before doing the computation if attribute transA is non-zero, - // same for B and transB. This operator supports unidirectional broadcasting - // (tensor C should be unidirectional broadcastable to tensor A * B). + assert(inputs.size() == 3); + auto a = inputs[0]; + auto b = inputs[1]; + auto c = inputs[2]; - // 0 means that no transpose is needed. It is the default value - bool trans_a = getIntAttribute(onnx_node, "transA", 0); - bool trans_b = getIntAttribute(onnx_node, "transB", 0); + // 1.0f is the default factor. + const float alpha_val = getFloatAttribute(onnx_node, "alpha", 1.0f); + const float beta_val = getFloatAttribute(onnx_node, "beta", 1.0f); - // 1.0f is the default factor - float alpha_val = getFloatAttribute(onnx_node, "alpha", 1.0f); - float beta_val = getFloatAttribute(onnx_node, "beta", 1.0f); + // 0 means that no transpose is needed. It is the default value. + const bool trans_a = getIntAttribute(onnx_node, "transA", 0); + const bool trans_b = getIntAttribute(onnx_node, "transB", 0); - // 1. Prepare input matrix A - // Flatten the shape by dim(0) - const auto &in_shape = inputs[0]->getShape(); - mir::Shape shape0{in_shape.dim(0), in_shape.numElements() / in_shape.dim(0)}; - auto input_a = createOp(graph, inputs[0], shape0)->getOutput(0); + // Transpose the A and B matrices as needed. if (trans_a) - input_a = createOp(graph, input_a, std::vector{1, 0}) - ->getOutput(0); - if (alpha_val != 1.0) + a = createOp(graph, a, std::vector{1, 0})->getOutput(0); + if (trans_b) + b = createOp(graph, b, std::vector{1, 0})->getOutput(0); + + // Calculate A * B. + auto ab = createOp(graph, a, b)->getOutput(0); + + // Multiply A * B by the constant factor. + if (alpha_val != 1.0f) { - auto alpha_tensor = createScalarTensor(alpha_val, input_a->getShape()); + mir::TensorVariant alpha_tensor(mir::DataType::FLOAT32, {1}, &alpha_val); auto alpha = createOp(graph, alpha_tensor)->getOutput(0); - input_a = createOp(graph, input_a, alpha)->getOutput(0); + ab = createOp(graph, alpha, ab)->getOutput(0); } - // 2. Prepare input matrix B - // - auto input_b = inputs[1]; - if (trans_b) - input_b = createOp(graph, input_b, std::vector{1, 0}) - ->getOutput(0); - // Number of cols in tensor A must be equal to number of rows in tensor B - assert(input_a->getShape().dim(1) == input_b->getShape().dim(0)); - mir::Shape mult_a_b{input_a->getShape().dim(0), input_b->getShape().dim(1)}; - - // 3. Prepare input matrix C - // - auto input_c = inputs[2]; - auto beta_tensor = createScalarTensor(beta_val, input_c->getShape()); - if ((mult_a_b.rank() == 2) && (input_c->getShape().rank() == 1)) + // Multiply C by the constant factor. + if (beta_val != 1.0f) { - beta_tensor = mir::TensorVariant(beta_tensor, mult_a_b); + mir::TensorVariant beta_tensor(mir::DataType::FLOAT32, {1}, &beta_val); + auto beta = createOp(graph, beta_tensor)->getOutput(0); + c = createOp(graph, beta, c)->getOutput(0); } - auto beta = createOp(graph, beta_tensor)->getOutput(0); - std::vector mul_inputs = {beta, input_c}; - auto c_mult = createOp(graph, beta, input_c)->getOutput(0); - assert(c_mult->getShape() == mult_a_b); - auto result = createOp(graph, input_a, input_b, c_mult); - return {result->getOutput(0)}; + + // Calculate the result: alpha * A * B + beta * C. + auto result = createOp(graph, ab, c)->getOutput(0); + + return {result}; } } // namespace mir_onnx -- 2.7.4