From 49250a517b766d1d302f14fa165e0ddd86c5acb1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=90=D0=BD=D0=B4=D1=80=D0=B5=D0=B9=20=D0=A2=D0=B8=D1=89?= =?utf8?q?=D0=B5=D0=BD=D0=BA=D0=BE/AI=20Tools=20Lab=20/SRR/Staff=20Enginee?= =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Thu, 13 Dec 2018 12:50:49 +0300 Subject: [PATCH] [nnc] New Gemm implementation, GlobalAveragePool and Conv2D updates. (#2622) The first steps in new Gemm implementation were done: Now it's a standard NN operator with proper shape inference. Interpreter (an d maybe soft backend) support will be added in the next patch. Pool and Conv operators are using similar input data that's why they were partially merged. As result they now produce proper output shape. Signed-off-by: Andrew V. Tischenko a.tischenko@partner.samsung.com --- contrib/nnc/core/CMakeLists.txt | 1 + contrib/nnc/core/modelIR/IrDotDumper.cpp | 8 ++ contrib/nnc/core/modelIR/Operation.cpp | 1 + contrib/nnc/core/modelIR/operations/GemmOp.cpp | 74 ++++++++++++ contrib/nnc/include/core/modelIR/IrDotDumper.h | 2 +- .../include/core/modelIR/operations/ConstantOp.h | 9 +- .../nnc/include/core/modelIR/operations/GemmOp.h | 56 +++++++++ .../core/modelIR/operations/operations.lst.h | 1 + .../nnc/include/passes/interpreter/Interpreter.h | 1 + .../passes/acl_soft_backend/AclCppOpGenerator.cpp | 5 + .../passes/acl_soft_backend/AclCppOpGenerator.h | 1 + contrib/nnc/passes/interpreter/Interpreter.cpp | 11 +- contrib/nnc/passes/interpreter/ops/Gemm.cpp | 19 +++ contrib/nnc/passes/interpreter/ops/Gemm.h | 44 +++++++ .../nnc/passes/onnx_frontend/ONNXImporterImpl.cpp | 61 ++++++++-- .../nnc/passes/onnx_frontend/ONNXImporterImpl.h | 1 + contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp | 128 ++++++++++++++------- contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp | 5 + contrib/nnc/passes/soft_backend/ModelAnalyzer.h | 1 + contrib/nnc/passes/soft_backend/SBSerializer.cpp | 7 +- contrib/nnc/passes/soft_backend/SBSerializer.h | 1 + contrib/nnc/tests/interpreter/graph_creator.cpp | 1 + 22 files changed, 378 insertions(+), 60 deletions(-) create mode 100644 contrib/nnc/core/modelIR/operations/GemmOp.cpp create mode 100644 contrib/nnc/include/core/modelIR/operations/GemmOp.h create mode 100644 contrib/nnc/passes/interpreter/ops/Gemm.cpp create mode 100644 contrib/nnc/passes/interpreter/ops/Gemm.h diff --git a/contrib/nnc/core/CMakeLists.txt b/contrib/nnc/core/CMakeLists.txt index 52aba7f..f2713f8 100644 --- a/contrib/nnc/core/CMakeLists.txt +++ b/contrib/nnc/core/CMakeLists.txt @@ -4,6 +4,7 @@ set(SOURCES "modelIR/operations/ConcatOp.cpp" "modelIR/operations/DepthwiseConv2DOp.cpp" "modelIR/operations/FullyConnectedOp.cpp" "modelIR/operations/GatherOp.cpp" + "modelIR/operations/GemmOp.cpp" "modelIR/operations/PadOp.cpp" "modelIR/operations/PoolOp.cpp" "modelIR/operations/SqueezeOp.cpp" diff --git a/contrib/nnc/core/modelIR/IrDotDumper.cpp b/contrib/nnc/core/modelIR/IrDotDumper.cpp index 84d261e..23731df 100644 --- a/contrib/nnc/core/modelIR/IrDotDumper.cpp +++ b/contrib/nnc/core/modelIR/IrDotDumper.cpp @@ -30,6 +30,7 @@ #include "core/modelIR/operations/EluOp.h" #include "core/modelIR/operations/FullyConnectedOp.h" #include "core/modelIR/operations/GatherOp.h" +#include "core/modelIR/operations/GemmOp.h" #include "core/modelIR/operations/PadOp.h" #include "core/modelIR/operations/PoolOp.h" #include "core/modelIR/operations/ReduceFOp.h" @@ -125,6 +126,13 @@ void IrDotDumper::visit(ops::FullyConnectedOp& op) { dotBuilder.updateWithOp(&op, nodeInfo); } +void IrDotDumper::visit(ops::GemmOp& op) { + auto nodeInfo = DotIrNodeInfo().withType("Gemm", op.getName()) + .withInShapes(getInputShapes(op)) + .withOutShapes(getOutputShapes(op)); + dotBuilder.updateWithOp(&op, nodeInfo); +} + void IrDotDumper::visit(ops::SoftmaxOp& op) { auto nodeInfo = DotIrNodeInfo().withType("Softmax", op.getName()) .withInShapes(getInputShapes(op)) diff --git a/contrib/nnc/core/modelIR/Operation.cpp b/contrib/nnc/core/modelIR/Operation.cpp index 2005ea6..97765ae 100644 --- a/contrib/nnc/core/modelIR/Operation.cpp +++ b/contrib/nnc/core/modelIR/Operation.cpp @@ -28,6 +28,7 @@ #include "core/modelIR/operations/EluOp.h" #include "core/modelIR/operations/FullyConnectedOp.h" #include "core/modelIR/operations/GatherOp.h" +#include "core/modelIR/operations/GemmOp.h" #include "core/modelIR/operations/PadOp.h" #include "core/modelIR/operations/PoolOp.h" #include "core/modelIR/operations/ReduceFOp.h" diff --git a/contrib/nnc/core/modelIR/operations/GemmOp.cpp b/contrib/nnc/core/modelIR/operations/GemmOp.cpp new file mode 100644 index 0000000..d0dd275 --- /dev/null +++ b/contrib/nnc/core/modelIR/operations/GemmOp.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/modelIR/operations/GemmOp.h" + +namespace nnc { +namespace mir { +namespace ops { + +void GemmOp::inferOutputShapes() { +//Input tensor A: The shape of A should be (M, K) if transA is 0, or (K, M) if transA is non-zero. +//Input tensor B: The shape of B should be (K, N) if transB is 0, or (N, K) if transB is non-zero. +//Input tensor C: The shape of C should be unidirectional broadcastable to (M, N). +//Output tensor Y: shape (M, N) or vector(N) if M == 1. + std::vector vector(2); // this vector will be used to create shapes of tensor A, B + + // Flatten the shape by dim(0) + mir::Shape shape0 = {getInputShape(0).dim(0), + getInputShape(0).numElements() / getInputShape(0).dim(0)}; + assert(shape0.rank() == 2); + Shape shape_a(2); + if (_transA) { + shape_a.dim(0) = shape0.dim(shape0.rank() - 1); + shape_a.dim(1) = shape0.dim(shape0.rank() - 2); + } else { + shape_a.dim(0) = shape0.dim(shape0.rank() - 2); + shape_a.dim(1) = shape0.dim(shape0.rank() - 1); + } + + auto& shape1 = getInputShape(1); + // It must be a matrice + assert(shape1.rank() == 2); + Shape shape_b(2); + + if (_transB) { + shape_b.dim(0) = shape1.dim(shape1.rank() - 1); + shape_b.dim(1) = shape1.dim(shape1.rank() - 2); + } else { + shape_b.dim(0) = shape1.dim(shape1.rank() - 2); + shape_b.dim(1) = shape1.dim(shape1.rank() - 1); + } + + // Number of cols in tensor A must be equal to number of rows in tensor B + assert(shape_a.dim(1) == shape_b.dim(0)); + Shape mult_a_b({shape_a.dim(0), shape_b.dim(1)}); + + Shape shape_c = getInputShape(2); + + if (shape_c.rank() == 1){ + assert(mult_a_b.dim(0) == 1); + assert(mult_a_b.dim(1) == shape_c.dim(0)); + } else { + assert(shape_c.rank() == 2); + assert((mult_a_b.dim(0) == shape_c.dim(0)) && (mult_a_b.dim(1) == shape_c.dim(1))); + } + setOutputShape(0, mult_a_b); +} + +} // namespace ops +} // namespace mir +} // namespace nnc diff --git a/contrib/nnc/include/core/modelIR/IrDotDumper.h b/contrib/nnc/include/core/modelIR/IrDotDumper.h index f5a907a..dad796c 100644 --- a/contrib/nnc/include/core/modelIR/IrDotDumper.h +++ b/contrib/nnc/include/core/modelIR/IrDotDumper.h @@ -18,7 +18,6 @@ #define _NNC_BACKEND_INTERPRETER_CORE_DOTDUMPER_ #include "core/modelIR/Visitor.h" - #include "core/modelIR/ir_dot_builder.h" namespace nnc @@ -45,6 +44,7 @@ public: void visit(ops::EluOp& op) override; void visit(ops::FullyConnectedOp& op) override; void visit(ops::GatherOp& op) override; + void visit(ops::GemmOp& op) override; void visit(ops::PadOp& op) override; void visit(ops::PoolOp& op) override; void visit(ops::ReduceFOp& op) override; diff --git a/contrib/nnc/include/core/modelIR/operations/ConstantOp.h b/contrib/nnc/include/core/modelIR/operations/ConstantOp.h index 075ccad..92d22f4 100644 --- a/contrib/nnc/include/core/modelIR/operations/ConstantOp.h +++ b/contrib/nnc/include/core/modelIR/operations/ConstantOp.h @@ -25,14 +25,15 @@ namespace ops { class ConstantOp : public Operation { public: - ConstantOp(const TensorVariant& value) : Operation(Type::constant, {}), _value(value) { - setOutputShape(0, _value.getShape()); + ConstantOp(const std::shared_ptr& value) : + Operation(Type::constant, {}), _value(value) { + setOutputShape(0, _value->getShape()); } - const TensorVariant& getValue() const { return _value; } + const std::shared_ptr& getValue() const { return _value; } private: - TensorVariant _value; + const std::shared_ptr& _value; }; } // namespace ops diff --git a/contrib/nnc/include/core/modelIR/operations/GemmOp.h b/contrib/nnc/include/core/modelIR/operations/GemmOp.h new file mode 100644 index 0000000..4c10af9 --- /dev/null +++ b/contrib/nnc/include/core/modelIR/operations/GemmOp.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _NNC_CORE_IR_MODEL_GEMM_OP_H_ +#define _NNC_CORE_IR_MODEL_GEMM_OP_H_ + +#include "core/modelIR/Operation.h" +#include "core/modelIR/TensorVariant.h" + +namespace nnc { +namespace mir { +namespace ops { + +class GemmOp : public Operation { +public: + GemmOp(const IODescriptor a, const IODescriptor b, const IODescriptor c, + bool transA, bool transB, float alpha, float beta) : + Operation(Type::gemmOp, {a, b, c}), + _a(a), _b(b), _c(c), _transA(transA),_transB(transB), _alpha(alpha), _beta(beta) { + inferOutputShapes(); + } + + bool getTransA() {return _transA;} + bool getTransB() {return _transB;} + float getAlpha() {return _alpha;} + float getBeta() {return _beta;} + +private: + void inferOutputShapes(); + + const IODescriptor _a; + const IODescriptor _b; + const IODescriptor _c; + bool _transA; + bool _transB; + float _alpha; + float _beta; +}; +} // namespace ops +} // namespace mir +} // namespace nnc + +#endif //_NNC_CORE_IR_MODEL_GEMM_OP_H_ diff --git a/contrib/nnc/include/core/modelIR/operations/operations.lst.h b/contrib/nnc/include/core/modelIR/operations/operations.lst.h index c4ae1d2..9292e71 100644 --- a/contrib/nnc/include/core/modelIR/operations/operations.lst.h +++ b/contrib/nnc/include/core/modelIR/operations/operations.lst.h @@ -25,6 +25,7 @@ HANDLE_OP(gather, GatherOp) HANDLE_OP(softmax, SoftmaxOp) HANDLE_OP(pool, PoolOp) HANDLE_OP(fullyConnected, FullyConnectedOp) +HANDLE_OP(gemmOp, GemmOp) HANDLE_OP(cappedReLU, CappedReluOp) HANDLE_OP(biasAdd, BiasAddOp) HANDLE_OP(variable, VariableOp) diff --git a/contrib/nnc/include/passes/interpreter/Interpreter.h b/contrib/nnc/include/passes/interpreter/Interpreter.h index f1ddba3..25239c2 100644 --- a/contrib/nnc/include/passes/interpreter/Interpreter.h +++ b/contrib/nnc/include/passes/interpreter/Interpreter.h @@ -49,6 +49,7 @@ public: void visit(ops::EluOp& op) override; void visit(ops::FullyConnectedOp& op) override; void visit(ops::GatherOp& op) override; + void visit(ops::GemmOp& op) override; void visit(ops::PadOp& op) override; void visit(ops::PoolOp& op) override; void visit(ops::ReduceFOp& op) override; diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp index 404fa3b..46d5c6f 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp @@ -17,6 +17,7 @@ #include "core/modelIR/operations/DropoutOp.h" #include "core/modelIR/operations/ElementwiseOp.h" #include "core/modelIR/operations/FullyConnectedOp.h" +#include "core/modelIR/operations/GemmOp.h" #include "core/modelIR/operations/PoolOp.h" #include "core/modelIR/operations/ReduceFOp.h" #include "core/modelIR/operations/ReluOp.h" @@ -290,6 +291,10 @@ void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) { runLayer(layer); } +void AclCppOpGenerator::visit(ops::GemmOp& op) { + assert(false); +} + void AclCppOpGenerator::visit(ops::CappedReluOp& op) { genActivation(op, "LU_BOUNDED_RELU", op.getCap()); } diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h index b6108fc..de10424 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h @@ -60,6 +60,7 @@ public: void visit(mir::ops::EluOp& op) override; void visit(mir::ops::FullyConnectedOp& op) override; void visit(mir::ops::GatherOp& op) override; + void visit(mir::ops::GemmOp& op) override; void visit(mir::ops::PadOp& op) override; void visit(mir::ops::PoolOp& op) override; void visit(mir::ops::ReduceFOp& op) override; diff --git a/contrib/nnc/passes/interpreter/Interpreter.cpp b/contrib/nnc/passes/interpreter/Interpreter.cpp index 76cd84a..a8cf131 100644 --- a/contrib/nnc/passes/interpreter/Interpreter.cpp +++ b/contrib/nnc/passes/interpreter/Interpreter.cpp @@ -22,6 +22,7 @@ #include "passes/interpreter/Interpreter.h" #include "core/modelIR/operations/FullyConnectedOp.h" +#include "core/modelIR/operations/GemmOp.h" #include "core/modelIR/operations/SoftmaxOp.h" #include "core/modelIR/operations/CappedReluOp.h" #include "core/modelIR/operations/DepthwiseConv2DOp.h" @@ -51,6 +52,7 @@ #include "ops/DeConv2D.h" #include "ops/Depthwise_conv_2D.h" #include "ops/FullyConnected.h" +#include "ops/Gemm.h" #include "ops/Pool.h" #include "ops/Reshape.h" #include "ops/Softmax.h" @@ -80,7 +82,7 @@ void NNInterpreter::visit(ops::VariableOp& op) { } void NNInterpreter::visit(ops::ConstantOp& op) { - var(op.getId()) = {op.getValue()}; + var(op.getId()) = {*op.getValue()}; } std::vector &NNInterpreter::getResult(Operation* op) { @@ -156,6 +158,13 @@ void NNInterpreter::visit(ops::FullyConnectedOp& op) { var(op.getId()) = FullyConnected(input, op)(); } +void NNInterpreter::visit(ops::GemmOp& op) { + mapByName(&op); + auto operand = op.getPrevNodes()[0]; + TensorVariant input = var(operand.op->getId())[operand.index]; + var(op.getId()) = Gemm(input, op)(); +} + void NNInterpreter::visit(ops::CappedReluOp& op) { mapByName(&op); auto operand = op.getPrevNodes()[0]; diff --git a/contrib/nnc/passes/interpreter/ops/Gemm.cpp b/contrib/nnc/passes/interpreter/ops/Gemm.cpp new file mode 100644 index 0000000..4259656 --- /dev/null +++ b/contrib/nnc/passes/interpreter/ops/Gemm.cpp @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Gemm.h" +//Do not remove +//Used to force compile Gemm.h diff --git a/contrib/nnc/passes/interpreter/ops/Gemm.h b/contrib/nnc/passes/interpreter/ops/Gemm.h new file mode 100644 index 0000000..e130626 --- /dev/null +++ b/contrib/nnc/passes/interpreter/ops/Gemm.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _NNC_CORE_BACKEND_INTERPRETER_GEMM_ +#define _NNC_CORE_BACKEND_INTERPRETER_GEMM_ + +#include "core/modelIR/ShapeRange.h" +#include "core/modelIR/operations/GemmOp.h" +#include "OperationImpl.h" + +namespace nnc +{ +template +class Gemm : public OperationImpl +{ +public: + Gemm(const mir::TensorVariant &_input, const mir::ops::GemmOp &_op) : + _op(_op), _input(_input) {} + + std::vector operator()() override { + mir::TensorVariant res = OperationImpl::allocate_tensor(_op.getOutputShape(0)); + return {res}; + } + +private: + const mir::ops::GemmOp& _op; + const mir::Tensor _input; +}; +} // namespace nnc + +#endif //_NNC_CORE_BACKEND_INTERPRETER_GEMM_ diff --git a/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.cpp b/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.cpp index a453471..98f68b8 100644 --- a/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.cpp +++ b/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.cpp @@ -150,23 +150,17 @@ void ONNXImporterImpl::createGraphInputs() { mir::Shape input_shape = ShapeHelper::createShape(onnx_tensor->dims(), static_cast(onnx_tensor->dims_size())); _inputTensors[name] = createTensor(onnx_tensor, input_shape); - auto constant = _graph->create(name, *_inputTensors[name].get()); + auto constant = _graph->create(name, _inputTensors[name]); _tensorNameToPrevMirOp[name] = constant; constants.insert(constant); } else { // We're dealing with graph input auto onnx_input_shape = input.type().tensor_type().shape(); - std::vector shape_vector(onnx_input_shape.dim_size()); + mir::Shape shape(4); for (int i = 0; i < onnx_input_shape.dim_size(); i++) { assert(onnx_input_shape.dim(i).has_dim_value()); - shape_vector[i] = onnx_input_shape.dim(i).dim_value(); + shape.dim(i) = onnx_input_shape.dim(i).dim_value(); } - mir::Shape shape(shape_vector); - // TODO For now we only support convolutional networks. The input data have already been - // transformed from ONNX NCHW format to ModelIR NHWC; reflect the changes in the IR. - if (shape.rank() == 4) - shape = mir::Shape{shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(0)}; - // TODO: Temporary solution! auto node = _graph->create(name, shape); _tensorNameToPrevMirOp[name] = node; @@ -176,6 +170,54 @@ void ONNXImporterImpl::createGraphInputs() { _graph->setConstants(constants); } +static void dumpShape(const mir::Shape& shape) { + std::cout << "{"; + for (int i = 0; i < shape.rank(); i++) { + std::cout << shape.dim(i) << (i == shape.rank() - 1 ? "} " : ", "); + } +} + +void ONNXImporterImpl::dump(const std::vector& ops, const onnx::NodeProto& onnx_node) { + for (auto op : ops) { + std::cout << onnx_node.op_type() << " '" << op->getName() << "' Input Shapes: "; + for (int i = 0; i < op->getNumInputs() ; i++) { + dumpShape(op->getInputShape(i)); + } + std::cout << " Output Shapes: "; + for (int i = 0; i < op->getNumOutputs() ; i++) { + dumpShape(op->getOutputShape(i)); + } + auto* onnx_op_type = ONNXPerfectHash::getONNXOpType(onnx_node.op_type().c_str(), onnx_node.op_type().size()); + switch (onnx_op_type->opCode) { + case ONNXOpCode::opConv: { + auto *conv = dynamic_cast(op); + std::cout << " Weights tensor shape "; + dumpShape(conv->getKernel().getShape()); + std::cout << " Strides "; + dumpShape(conv->getStrides()); + std::cout << " Padding before: (" << conv->getPaddingBefore()[0] << " " << conv->getPaddingBefore()[1] << ")"; + std::cout << " After: (" << conv->getPaddingAfter()[0] << " " << conv->getPaddingAfter()[1] << ")"; + break; + } + case ONNXOpCode::opGlobalAveragePool: + case ONNXOpCode::opAveragePool: + case ONNXOpCode::opMaxPool: { + auto *pool = dynamic_cast(op); + std::cout << " Kernel "; + dumpShape(pool->getWindowShape()); + std::cout << " Strides "; + dumpShape(pool->getStrides()); + std::cout << " Padding before: " << pool->getPaddingBefore()[0] << " " << pool->getPaddingBefore()[1]; + std::cout << " After: " << pool->getPaddingAfter()[0] << " " << pool->getPaddingAfter()[1]; + break; + } + default: + break; + } + std::cout << "\n"; + } +} + mir::Graph *ONNXImporterImpl::createIR() { GOOGLE_PROTOBUF_VERIFY_VERSION; @@ -267,6 +309,7 @@ mir::Graph *ONNXImporterImpl::createIR() { assert (outputs.size()); // FIXME: it should be done properly via the given graph outputs _graphOutputs.assign(outputs.begin(), outputs.end()); + dump(outputs, onnx_node); } // set graph outputs // TODO: it should be done with onnx graph outputs diff --git a/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.h b/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.h index 67667fb..24ec1c3 100644 --- a/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.h +++ b/contrib/nnc/passes/onnx_frontend/ONNXImporterImpl.h @@ -39,6 +39,7 @@ public: void import() {}; mir::Graph *createIR() override; + void dump(const std::vector& op, const onnx::NodeProto& onnx_node); private: void createGraphInputs(); diff --git a/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp b/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp index 6d6faf1..0e9b111 100644 --- a/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp +++ b/contrib/nnc/passes/onnx_frontend/ONNXOpCreator.cpp @@ -31,6 +31,7 @@ #include "core/modelIR/operations/DepthwiseConv2DOp.h" #include "core/modelIR/operations/DropoutOp.h" #include "core/modelIR/operations/FullyConnectedOp.h" +#include "core/modelIR/operations/GemmOp.h" #include "core/modelIR/operations/PoolOp.h" #include "core/modelIR/operations/ReluOp.h" #include "core/modelIR/operations/ReshapeOp.h" @@ -86,34 +87,67 @@ static const mir::TensorVariant* createTensor(float data) { std::shared_ptr shared_buffer (new char[buffer_size], std::default_delete()); memcpy(shared_buffer.get(), src_data, buffer_size); Shape tensor_shape = Shape({1}); + // FIXME: it has to be shared_ptr auto mir_tensor = new mir::TensorVariant(tensor_shape, shared_buffer, type, element_size); return mir_tensor; } +struct KernelStridesPadding { + Shape kernel_shape; + Shape strides_shape; + std::vector padding_before{0, 0}; + std::vector padding_after{0, 0}; +}; + +static void getKernelStridesPadding(const onnx::NodeProto &onnx_node, KernelStridesPadding &cdata) { + auto* kshape = findAttribute(onnx_node, "kernel_shape"); + assert(kshape && kshape->ints_size()); + auto* strides = findAttribute(onnx_node, "strides"); + assert(strides && strides->ints_size()); + auto* pads = findAttribute(onnx_node, "pads"); + + cdata.kernel_shape = ShapeHelper::createShape(kshape->ints(), kshape->ints_size()); + cdata.strides_shape = ShapeHelper::createShape(strides->ints(), strides->ints_size()); + + if (pads) { + assert(pads->ints_size() >= 2); + cdata.padding_before[0] = pads->ints(0); + cdata.padding_before[1] = pads->ints(1); + // TODO: ONNX padding could be for the beginning and ending along each axis that's why we + // should select the interesting ones. + if (pads->ints_size() == 4) { + cdata.padding_after[0] = pads->ints(2); + cdata.padding_after[1] = pads->ints(3); + } + } +}; + std::vector ONNXOpCreator::convertConv2D(InputOps& inputs, const onnx::NodeProto& onnx_node) { assert(inputs.size() >= 2); - auto* strides = findAttribute(onnx_node, "strides"); - assert(strides && strides->ints_size()); - Shape onnx_strides_shape = ShapeHelper::createShape(strides->ints(), strides->ints_size()); - // FIXME: it's a hack - Shape strides_shape = {onnx_strides_shape.dim(0), onnx_strides_shape.dim(1), 1}; + KernelStridesPadding cdata; + getKernelStridesPadding(onnx_node, cdata); + // FIXME: It can be non-constant value. auto* in_weights = dynamic_cast(inputs[1]); - assert(in_weights); + assert(in_weights && "Weights could be a constant tensor only"); auto in_weights_tensor = in_weights->getValue(); - // TODO: we don't support padding at the moment - std::vector padding_before{0, 0}; - std::vector padding_after{0, 0}; - Operation* input_bias; - if (inputs.size() > 2) - input_bias = inputs[2]; + // We should transpose ONNX MCHW to HWOI + auto transposed = transposeTensor<2, 3, 1, 0>(in_weights_tensor); + + mir::ops::ConstantOp* input_bias = nullptr; + if (inputs.size() > 2) { + input_bias = dynamic_cast(inputs[2]); + assert(input_bias && "1D optional bias could be a constant tensor only"); + } inputs.resize(1); std::vector outputs; - outputs = createOp(inputs[0]->getOutput(0), in_weights_tensor, strides_shape, - padding_before, padding_after); - // TODO: there could be bias tensor as inputs[2]. + outputs = createOp(inputs[0]->getOutput(0), *transposed, cdata.strides_shape, + cdata.padding_before, cdata.padding_after); + if (input_bias) + outputs = createOp(outputs[0]->getOutput(0), *input_bias->getValue()); + return outputs; } @@ -136,8 +170,7 @@ std::vector ONNXOpCreator::convertPool(InputOps& inputs, ONNXOpCode ops::PoolOp::PoolingType pool_type; std::vector result; - std::vector padding_before{0, 0}; - std::vector padding_after{0, 0}; + KernelStridesPadding cdata; switch (op_code) { case ONNXOpCode::opGlobalAveragePool: @@ -147,7 +180,7 @@ std::vector ONNXOpCreator::convertPool(InputOps& inputs, ONNXOpCode ops::PoolOp::PoolingType::AVG, inputs[0]->getOutputShape(0), // kernel_shape Shape({1, 1}), // strides_shape - padding_before, padding_after,// no padding + cdata.padding_before, cdata.padding_after, ops::PoolOp::BorderType::ZEROFILLED, ops::PoolOp::RoundMode::floor); case ONNXOpCode::opAveragePool: @@ -162,29 +195,12 @@ std::vector ONNXOpCreator::convertPool(InputOps& inputs, ONNXOpCode assert(false); } // Proceed with Average or Max Pool - auto* kshape = findAttribute(onnx_node, "kernel_shape"); - assert(kshape && kshape->ints_size()); - auto* strides = findAttribute(onnx_node, "strides"); - assert(strides && strides->ints_size()); - auto* pads = findAttribute(onnx_node, "pads"); - - Shape kernel_shape = ShapeHelper::createShape(kshape->ints(), kshape->ints_size()); - Shape strides_shape = ShapeHelper::createShape(strides->ints(), strides->ints_size()); - - if (pads) { - assert(pads->ints_size() >= 2); - padding_before[0] = pads->ints(0); - padding_before[1] = pads->ints(1); - // TODO: ONNX padding could be for the beginning and ending along each axis that's why we - // should select the interesting ones. - if (pads->ints_size() == 4) { - padding_after[0] = pads->ints(2); - padding_after[1] = pads->ints(3); - } + getKernelStridesPadding(onnx_node, cdata); - } - result = createOp(inputs[0]->getOutput(0), pool_type, kernel_shape, strides_shape, - padding_before, padding_after, border_type, + result = createOp(inputs[0]->getOutput(0), pool_type, + cdata.kernel_shape, cdata.strides_shape, + cdata.padding_before, cdata.padding_after, + border_type, ops::PoolOp::RoundMode::floor); return result; } @@ -252,7 +268,7 @@ std::vector ONNXOpCreator::convertBatchNorm(InputOps& inputs, } std::vector ONNXOpCreator::convertDropout(InputOps& inputs, - const onnx::NodeProto& onnx_node) { + const onnx::NodeProto& onnx_node) { bool found; float value; std::tie(found, value) = getFloatAttribute(onnx_node, "ratio"); @@ -271,8 +287,32 @@ std::vector ONNXOpCreator::convertScale(InputOps& inputs, } std::vector ONNXOpCreator::convertGemm(InputOps& inputs, - const onnx::NodeProto& onnx_node) { - // TODO: NIY - return inputs; + const onnx::NodeProto& onnx_node) { + bool found; + int ivalue; + float fvalue; + + std::tie (found, ivalue) = getIntAttribute(onnx_node, "transA"); + bool transA = found ? ivalue : 0; + std::tie (found, ivalue) = getIntAttribute(onnx_node, "transB"); + bool transB = found ? ivalue : 0; + std::tie (found, fvalue) = getIntAttribute(onnx_node, "alpha"); + float alpha = found ? fvalue : 1.0; + std::tie (found, fvalue) = getIntAttribute(onnx_node, "beta"); + float beta = found ? fvalue : 1.0; + + // Flatten the shape by dim(0) + mir::Shape shape0 ({inputs[0]->getOutputShape(0).dim(0), + inputs[0]->getOutputShape(0).numElements() / + inputs[0]->getOutputShape(0).dim(0)}); + auto reshape = createOp(inputs[0]->getOutput(0), shape0); + + std::vector descriptors; + descriptors.push_back(reshape[0]->getOutput(0)); + descriptors.push_back(inputs[1]->getOutput(0)); + descriptors.push_back(inputs[2]->getOutput(0)); + + return createOp(reshape[0]->getOutput(0), inputs[1]->getOutput(0), + inputs[2]->getOutput(0), transA, transB, alpha, beta); } } // namespace nnc diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp index 3fe75d3..ebb0b2c 100644 --- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp +++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp @@ -36,6 +36,7 @@ #include "core/modelIR/operations/ElementwiseOp.h" #include "core/modelIR/operations/EluOp.h" #include "core/modelIR/operations/FullyConnectedOp.h" +#include "core/modelIR/operations/GemmOp.h" #include "core/modelIR/operations/GatherOp.h" #include "core/modelIR/operations/PadOp.h" #include "core/modelIR/operations/PoolOp.h" @@ -207,6 +208,10 @@ void ModelAnalyzer::visit(ops::FullyConnectedOp& op) { addOpDescr(&op, "fullConnect"); } +void ModelAnalyzer::visit(ops::GemmOp& op) { + addOpDescr(&op, "gemm"); +} + void ModelAnalyzer::visit(ops::CappedReluOp& op) { addOpDescr(&op, "cappedRelu"); } diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.h b/contrib/nnc/passes/soft_backend/ModelAnalyzer.h index a9ce551..36cb487 100644 --- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.h +++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.h @@ -102,6 +102,7 @@ public: void visit(mir::ops::EluOp& op) override; void visit(mir::ops::FullyConnectedOp& op) override; void visit(mir::ops::GatherOp& op) override; + void visit(mir::ops::GemmOp& op) override; void visit(mir::ops::PadOp& op) override; void visit(mir::ops::PoolOp& op) override; void visit(mir::ops::ReduceFOp& op) override; diff --git a/contrib/nnc/passes/soft_backend/SBSerializer.cpp b/contrib/nnc/passes/soft_backend/SBSerializer.cpp index c2241ff..29bbe5a 100644 --- a/contrib/nnc/passes/soft_backend/SBSerializer.cpp +++ b/contrib/nnc/passes/soft_backend/SBSerializer.cpp @@ -28,6 +28,7 @@ #include "core/modelIR/operations/SoftmaxOp.h" #include "core/modelIR/operations/PoolOp.h" #include "core/modelIR/operations/FullyConnectedOp.h" +#include "core/modelIR/operations/GemmOp.h" #include "core/modelIR/operations/CappedReluOp.h" #include "core/modelIR/operations/BiasAddOp.h" #include "core/modelIR/operations/ReluOp.h" @@ -229,6 +230,10 @@ void Serializer::visit(ops::FullyConnectedOp& op) { serializeShape(op.getOutputShape(0)); } +void Serializer::visit(ops::GemmOp& op) { + _curOp->_paramStartOffset = _buffer.size(); +} + void Serializer::visit(ops::CappedReluOp& op) { _curOp->_paramStartOffset = _buffer.size(); serializeT(op.getCap()); @@ -245,7 +250,7 @@ void Serializer::visit(ops::VariableOp& op) { void Serializer::visit(ops::ConstantOp& op) { _curOp->_paramStartOffset = _buffer.size(); - serializeTensor(op.getValue()); + serializeTensor(*op.getValue()); } void Serializer::visit(ops::ReluOp& op) { diff --git a/contrib/nnc/passes/soft_backend/SBSerializer.h b/contrib/nnc/passes/soft_backend/SBSerializer.h index 748db4e..2e68ebf 100644 --- a/contrib/nnc/passes/soft_backend/SBSerializer.h +++ b/contrib/nnc/passes/soft_backend/SBSerializer.h @@ -54,6 +54,7 @@ public: void visit(mir::ops::EluOp& op) override; void visit(mir::ops::FullyConnectedOp& op) override; void visit(mir::ops::GatherOp& op) override; + void visit(mir::ops::GemmOp& op) override; void visit(mir::ops::PadOp& op) override; void visit(mir::ops::PoolOp& op) override; void visit(mir::ops::ReduceFOp& op) override; diff --git a/contrib/nnc/tests/interpreter/graph_creator.cpp b/contrib/nnc/tests/interpreter/graph_creator.cpp index a23ac4e..1bf3a58 100644 --- a/contrib/nnc/tests/interpreter/graph_creator.cpp +++ b/contrib/nnc/tests/interpreter/graph_creator.cpp @@ -19,6 +19,7 @@ #include "core/modelIR/operations/VariableOp.h" #include "core/modelIR/operations/FullyConnectedOp.h" +#include "core/modelIR/operations/GemmOp.h" #include "core/modelIR/operations/Conv2DOp.h" #include "core/modelIR/operations/DepthwiseConv2DOp.h" #include "core/modelIR/operations/PoolOp.h" -- 2.7.4