From 91fd88a1a5a88b39703a7c358297065bb43aac70 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?= =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?= =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Wed, 16 Jan 2019 12:23:45 +0300 Subject: [PATCH] [nnc] Make FullyConnectedOp treat the second tensor as ordinary argument (#2858) * Change the signature of FullyConnectedOp to identically handle both input parameters. * Refactor uses of FullyConnectedOp. Signed-off-by: Sergei Barannikov --- contrib/nnc/core/modelIR/IrDotDumper.cpp | 3 +- .../core/modelIR/operations/FullyConnectedOp.cpp | 2 +- .../core/modelIR/operations/FullyConnectedOp.h | 10 +--- .../passes/acl_soft_backend/AclCppOpGenerator.cpp | 12 +++-- .../passes/caffe2_frontend/caffe2_op_creator.cpp | 4 +- .../nnc/passes/caffe_frontend/caffe_op_creator.cpp | 18 ++++--- .../nnc/passes/caffe_frontend/caffe_op_creator.h | 2 +- contrib/nnc/passes/interpreter/Interpreter.cpp | 8 +-- .../nnc/passes/interpreter/ops/FullyConnected.cpp | 47 +++++++++++++++++- .../nnc/passes/interpreter/ops/FullyConnected.h | 58 ++++------------------ contrib/nnc/passes/soft_backend/SBSerializer.cpp | 1 - .../soft_backend/code_snippets/cpp_operations.def | 11 ++-- .../passes/tflite_frontend/tflite_op_creator.cpp | 8 +-- contrib/nnc/tests/interpreter/gen/gen_test_data.py | 2 +- contrib/nnc/tests/interpreter/graph_creator.cpp | 2 +- .../interpreter/test_data/test_description.txt | 4 +- contrib/nnc/unittests/acl_backend/MIRToDOM.cpp | 7 +-- .../nnc/unittests/soft_backend/CPPOperations.cpp | 17 ++++--- 18 files changed, 109 insertions(+), 107 deletions(-) diff --git a/contrib/nnc/core/modelIR/IrDotDumper.cpp b/contrib/nnc/core/modelIR/IrDotDumper.cpp index 5097718..d469c69 100644 --- a/contrib/nnc/core/modelIR/IrDotDumper.cpp +++ b/contrib/nnc/core/modelIR/IrDotDumper.cpp @@ -123,8 +123,7 @@ void IrDotDumper::visit(ops::DepthwiseConv2DOp& op) { void IrDotDumper::visit(ops::FullyConnectedOp& op) { auto nodeInfo = DotIrNodeInfo().withType("FullyConnected", op.getName()) .withInShapes(getInputShapes(op)) - .withOutShapes(getOutputShapes(op)) - .withKernelShape(op.getWeights().getShape()); + .withOutShapes(getOutputShapes(op)); dotBuilder.updateWithOp(&op, nodeInfo); } diff --git a/contrib/nnc/core/modelIR/operations/FullyConnectedOp.cpp b/contrib/nnc/core/modelIR/operations/FullyConnectedOp.cpp index c4ffa6b..576f57f 100644 --- a/contrib/nnc/core/modelIR/operations/FullyConnectedOp.cpp +++ b/contrib/nnc/core/modelIR/operations/FullyConnectedOp.cpp @@ -22,7 +22,7 @@ namespace ops { void FullyConnectedOp::inferOutputShapes() { auto& input_shape = getInputShape(0); - auto& weights_shape = getWeights().getShape(); + auto& weights_shape = getInputShape(1); auto input_rank = input_shape.rank(); auto weights_rank = weights_shape.rank(); diff --git a/contrib/nnc/include/core/modelIR/operations/FullyConnectedOp.h b/contrib/nnc/include/core/modelIR/operations/FullyConnectedOp.h index 33b3b92..dcef052 100644 --- a/contrib/nnc/include/core/modelIR/operations/FullyConnectedOp.h +++ b/contrib/nnc/include/core/modelIR/operations/FullyConnectedOp.h @@ -26,21 +26,15 @@ namespace ops { class FullyConnectedOp : public Operation { public: - FullyConnectedOp(const IODescriptor& arg, const TensorVariant& weights) - : Operation(Type::fullyConnected, {arg}), _weights(weights) { + FullyConnectedOp(const IODescriptor& arg1, const IODescriptor& arg2) + : Operation(Type::fullyConnected, {arg1, arg2}) { inferOutputShapes(); } - const TensorVariant& getWeights() const { return _weights; } - private: void inferOutputShapes(); - - TensorVariant _weights; }; - - } // namespace ops } // namespace mir } // namespace nnc diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp index 5a79e93..51a9542 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp @@ -335,12 +335,16 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) { } void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) { - const TensorVariant ir_weights = transposeTensor<1, 0>(op.getWeights()); - const Shape& ir_weights_shape = ir_weights.getShape(); - auto& prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() == 1); + assert(prev_nodes.size() == 2); + auto in_op = prev_nodes[0].op; + auto ir_weights_op = dynamic_cast(prev_nodes[1].op); + if (ir_weights_op == nullptr) + throw AclCppException("Unsupported operation type"); + + const TensorVariant ir_weights = transposeTensor<1, 0>(ir_weights_op->getValue()); + const Shape& ir_weights_shape = ir_weights.getShape(); // Get the input node tensor id in the DOM. auto in = AF::id(tensorName(in_op)); diff --git a/contrib/nnc/passes/caffe2_frontend/caffe2_op_creator.cpp b/contrib/nnc/passes/caffe2_frontend/caffe2_op_creator.cpp index 690d21b..585e649 100644 --- a/contrib/nnc/passes/caffe2_frontend/caffe2_op_creator.cpp +++ b/contrib/nnc/passes/caffe2_frontend/caffe2_op_creator.cpp @@ -358,8 +358,8 @@ Caffe2OpCreator::convertFullyConnected(const std::vector& inputs, // Transform input into 2-D tensor by flattening axes Shape shape{input_shape.dim(0), input_shape.numElements() / input_shape.dim(0)}; auto reshape = createOp(inputs[0], shape); - auto result = createOp(reshape->getOutput(0), weights_tensor); - + auto weights = createOp(weights_tensor)->getOutput(0); + auto result = createOp(reshape->getOutput(0), weights); auto bias = createOp(mir_tensors.at(op.input(2)))->getOutput(0); result = createOp(result->getOutput(0), bias); return {result->getOutput(0)}; diff --git a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp index d681d06..72d0a33 100644 --- a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp +++ b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp @@ -106,10 +106,10 @@ CaffeOpCreator::createSplit(mir::IODescriptor arg, int32_t num_parts, int32_t ax /// @brief Helper function for creating FullyConnected operation with non-square input. IODescriptor CaffeOpCreator::createFullyConnected(const mir::IODescriptor& input, - const mir::TensorVariant& weights, + const mir::IODescriptor& weights, int32_t axis) { const auto& input_shape = input.op->getOutputShape(input.index); - const auto& weights_shape = weights.getShape(); + const auto& weights_shape = weights.op->getOutputShape(weights.index); assert(axis >= 0 && axis < input_shape.rank()); assert(weights_shape.rank() == 2); @@ -306,11 +306,12 @@ std::vector CaffeOpCreator::convertInnerProduct(const LayerParameter& layer, const std::vector& inputs) { const auto& params = layer.inner_product_param(); - auto weights = convertBlob(layer.blobs(0)); + auto weights_tensor = convertBlob(layer.blobs(0)); if (!params.transpose()) - weights = transposeTensor<1, 0>(weights); + weights_tensor = transposeTensor<1, 0>(weights_tensor); + auto weights = createOp("", weights_tensor)->getOutput(0); auto result = createFullyConnected(inputs[0], weights, params.axis()); // Add the bias, if any. @@ -706,9 +707,12 @@ CaffeOpCreator::convertLSTM(const caffe::LayerParameter& layer, const int32_t hidden_size = params.num_output(); // Learned parameters of the layer. Tensors are transposed to match the ModelIR. - const auto& xw = transposeTensor<1, 0>(convertBlob(layer.blobs(0))); - auto xb = createOp("", convertBlob(layer.blobs(1)))->getOutput(0); - const auto& hw = transposeTensor<1, 0>(convertBlob(layer.blobs(2))); + auto xw_tensor = transposeTensor<1, 0>(convertBlob(layer.blobs(0))); + auto xb_tensor = convertBlob(layer.blobs(1)); + auto hw_tensor = transposeTensor<1, 0>(convertBlob(layer.blobs(2))); + auto xw = createOp("", xw_tensor)->getOutput(0); + auto xb = createOp("", xb_tensor)->getOutput(0); + auto hw = createOp("", hw_tensor)->getOutput(0); // Add a dummy dimension so that element-wise operations perform properly. cont = createOp("", cont, Shape{seq_length, batch_size, 1})->getOutput(0); diff --git a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.h b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.h index b18bfd1..e6e171b 100644 --- a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.h +++ b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.h @@ -138,7 +138,7 @@ private: mir::IODescriptor createFullyConnected(const mir::IODescriptor& input, - const mir::TensorVariant& weights, + const mir::IODescriptor& weights, int32_t axis); TensorVariant convertBlob(const caffe::BlobProto& blob); diff --git a/contrib/nnc/passes/interpreter/Interpreter.cpp b/contrib/nnc/passes/interpreter/Interpreter.cpp index b50bc8e..4946c10 100644 --- a/contrib/nnc/passes/interpreter/Interpreter.cpp +++ b/contrib/nnc/passes/interpreter/Interpreter.cpp @@ -208,9 +208,11 @@ void NNInterpreter::visit(ops::PoolOp& op) { } void NNInterpreter::visit(ops::FullyConnectedOp& op) { - auto operand = op.getPrevNodes()[0]; - TensorVariant input = var(operand.op->getId())[operand.index]; - var(op.getId()) = FullyConnected(input, op)(); + auto operand1 = op.getPrevNodes()[0]; + auto operand2 = op.getPrevNodes()[1]; + TensorVariant input1 = var(operand1.op->getId())[operand1.index]; + TensorVariant input2 = var(operand2.op->getId())[operand2.index]; + var(op.getId()) = FullyConnected(input1, input2, op)(); } void NNInterpreter::visit(ops::GemmOp& op) { diff --git a/contrib/nnc/passes/interpreter/ops/FullyConnected.cpp b/contrib/nnc/passes/interpreter/ops/FullyConnected.cpp index 1c4741a..684561c 100644 --- a/contrib/nnc/passes/interpreter/ops/FullyConnected.cpp +++ b/contrib/nnc/passes/interpreter/ops/FullyConnected.cpp @@ -15,5 +15,48 @@ */ #include "FullyConnected.h" -//Do not remove -//Used to force compile FullyConnected.h + +namespace nnc { + +FullyConnected::FullyConnected(const mir::TensorVariant& input, + const mir::TensorVariant& weights, + const mir::ops::FullyConnectedOp& _op) + : _op(_op), _input(input), _weights(weights) {} + +std::vector FullyConnected::operator()() { + mir::TensorVariant res = OperationImpl::allocate_tensor(_op.getOutputShape(0)); + mir::Tensor accessor(res); + + const mir::Shape& in_shape = _input.getShape(); + int32_t in_rank = in_shape.rank(); + + const mir::Shape& w_shape = _weights.getShape(); + int32_t w_rank = w_shape.rank(); + + assert(in_shape.dim(in_rank - 1) == w_shape.dim(w_rank - 2)); + (void)in_rank; + + mir::ShapeRange out_range(res.getShape()); + + int32_t len = w_shape.dim(w_rank - 2); + + for (auto& out_index : out_range) { + mir::Index t_index = out_index; + float& output_element = accessor.at(out_index); + int32_t col = t_index.at(w_rank - 1); + int32_t row = t_index.at(w_rank - 2); + for (int32_t i = 0; i < len; ++i) { + t_index.at(w_rank - 1) = i; + float in = _input.at(t_index); + t_index.at(w_rank - 1) = col; + t_index.at(w_rank - 2) = i; + float w = _weights.at(t_index); + t_index.at(w_rank - 2) = row; + output_element += in * w; + } + } + + return {res}; +} + +} diff --git a/contrib/nnc/passes/interpreter/ops/FullyConnected.h b/contrib/nnc/passes/interpreter/ops/FullyConnected.h index 7688ac6..b651298 100644 --- a/contrib/nnc/passes/interpreter/ops/FullyConnected.h +++ b/contrib/nnc/passes/interpreter/ops/FullyConnected.h @@ -21,60 +21,20 @@ #include "core/modelIR/operations/FullyConnectedOp.h" #include "OperationImpl.h" -namespace nnc -{ +namespace nnc { -template -class FullyConnected : public OperationImpl -{ +class FullyConnected : public OperationImpl { public: - FullyConnected(const mir::TensorVariant &_input, const mir::ops::FullyConnectedOp &_op) : _op(_op), _input(_input) {} + FullyConnected(const mir::TensorVariant& input, + const mir::TensorVariant& weights, + const mir::ops::FullyConnectedOp& _op); - std::vector operator()() override - { - mir::TensorVariant res = OperationImpl::allocate_tensor(_op.getOutputShape(0)); - mir::Tensor accessor(res); - - mir::ShapeRange outRange(res.getShape()); - - mir::Tensor weights(_op.getWeights()); - const mir::Shape &wShape = weights.getShape(); - int32_t wRank = wShape.rank(); - - const mir::Shape &inShape = _input.getShape(); - int32_t inRank = inShape.rank(); - - assert(inShape.dim(inRank - 1) == wShape.dim(wRank - 2)); - (void)inRank; - - const auto len = wShape.dim(wRank - 2); - - int32_t row; - int32_t col; - for (auto &outIdx : outRange) - { - mir::Index tIdx = outIdx; - T& outputElement = accessor.at(outIdx); - col = tIdx.at(wRank - 1); - row = tIdx.at(wRank - 2); - for (int32_t i = 0; i < len; ++i) - { - tIdx.at(wRank - 1) = i; - const T& in = _input.at(tIdx); - tIdx.at(wRank - 1) = col; - tIdx.at(wRank - 2) = i; - const T& w = weights.at(tIdx); - tIdx.at(wRank - 2) = row; - outputElement += w * in; - } - } - - return {res}; - } + std::vector operator()() override; private: - const mir::ops::FullyConnectedOp &_op; - const mir::Tensor _input; + const mir::ops::FullyConnectedOp& _op; + const mir::Tensor _input; + const mir::Tensor _weights; }; } // namespace nnc diff --git a/contrib/nnc/passes/soft_backend/SBSerializer.cpp b/contrib/nnc/passes/soft_backend/SBSerializer.cpp index 98a5a5d..0b3dadd 100644 --- a/contrib/nnc/passes/soft_backend/SBSerializer.cpp +++ b/contrib/nnc/passes/soft_backend/SBSerializer.cpp @@ -216,7 +216,6 @@ void Serializer::visit(ops::PoolOp& op) { void Serializer::visit(ops::FullyConnectedOp& op) { _curOp->_paramStartOffset = _buffer.size(); - serializeTensor(op.getWeights()); serializeShape(op.getOutputShape(0)); } diff --git a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def index 527e558..8ab0889 100644 --- a/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def +++ b/contrib/nnc/passes/soft_backend/code_snippets/cpp_operations.def @@ -422,17 +422,12 @@ void avgPool(Tensor &out, const char *params, const Tensor &in) genericPool(AveragePool, out, params, in); } -void fullConnect(Tensor &out, const char *params, const Tensor &in) -{ - const float *input = in.getData(); - Dims<4> input_d = shapeToDims(in.getShape()); - Kernel kernel = deserializeKernel(params); +void fullConnect(Tensor& out, const char* params, const Tensor& in, const Tensor& w) { Shape out_s = deserializeShape(params); - out.reShape(out_s); - FullyConnected(input, input_d, - kernel.data, kernel.dims, + FullyConnected(in.getData(), shapeToDims(in.getShape()), + w.getData(), shapeToDims(w.getShape()), out.getData(), shapeToDims(out_s)); } diff --git a/contrib/nnc/passes/tflite_frontend/tflite_op_creator.cpp b/contrib/nnc/passes/tflite_frontend/tflite_op_creator.cpp index 77916d7..32007c9 100644 --- a/contrib/nnc/passes/tflite_frontend/tflite_op_creator.cpp +++ b/contrib/nnc/passes/tflite_frontend/tflite_op_creator.cpp @@ -321,10 +321,10 @@ TFLiteOpCreator::convertFullyConnected(const std::vector& inp // Add Reshape operation to make sure the input for FC operation has shape [1, fc_input_size] int32_t fc_input_size = params[0].getShape().dim(0); auto flatten = createOp(inputs[0], Shape{1, fc_input_size}); - - auto result = createOp(flatten->getOutput(0), params[0]); - auto bias = createOp(params[1]); - result = createOp(result->getOutput(0), bias->getOutput(0)); + auto weights = createOp(params[0])->getOutput(0); + auto result = createOp(flatten->getOutput(0), weights); + auto bias = createOp(params[1])->getOutput(0); + result = createOp(result->getOutput(0), bias); return {addFusedActivation(result->getOutput(0), opts->fused_activation_function())}; } diff --git a/contrib/nnc/tests/interpreter/gen/gen_test_data.py b/contrib/nnc/tests/interpreter/gen/gen_test_data.py index 0c154e0..68c6455 100644 --- a/contrib/nnc/tests/interpreter/gen/gen_test_data.py +++ b/contrib/nnc/tests/interpreter/gen/gen_test_data.py @@ -17,7 +17,7 @@ from opinfo import PoolType # 'axis' for CAPPED_RELU is not an error, it just denotes a numeric parameter. OP_FORMATS = { - 'FULLY_CONNECTED': ('kernels',), + 'FULLY_CONNECTED': (), 'CONV_2D': ('kernels', 'padType', 'shapes'), 'DEPTHWISE_CONV_2D': ('kernels', 'padType', 'shapes'), 'POOL_2D': ('padType', 'poolType', 'shapes'), diff --git a/contrib/nnc/tests/interpreter/graph_creator.cpp b/contrib/nnc/tests/interpreter/graph_creator.cpp index e1c9e20..d2deaf5 100644 --- a/contrib/nnc/tests/interpreter/graph_creator.cpp +++ b/contrib/nnc/tests/interpreter/graph_creator.cpp @@ -42,7 +42,7 @@ using namespace nnc::mir; static Operation* createFullyConnected(std::unique_ptr& g, const std::vector& inputs, const opinfo::OperatorInfo* opInfo) { - return g->create("y", inputs[0], *getKernel(opInfo)); + return g->create("y", inputs[0], inputs[1]); } static Operation* createConv2D(std::unique_ptr& g, diff --git a/contrib/nnc/tests/interpreter/test_data/test_description.txt b/contrib/nnc/tests/interpreter/test_data/test_description.txt index ab4b2b6..de85ccd 100644 --- a/contrib/nnc/tests/interpreter/test_data/test_description.txt +++ b/contrib/nnc/tests/interpreter/test_data/test_description.txt @@ -3,8 +3,8 @@ # For example: [2, 3, 4]=RANDOM, [3, 4, 5]=NEAR_ZERO etc. FULLY_CONNECTED -[3, 3] [3, 3] -[5, 10] [10, 3] +[[3, 3] [3, 3]] +[[5, 10] [10, 3]] CONV_2D # input shape: [height, width, in_channels] diff --git a/contrib/nnc/unittests/acl_backend/MIRToDOM.cpp b/contrib/nnc/unittests/acl_backend/MIRToDOM.cpp index 3a94b36..0dfdae2 100644 --- a/contrib/nnc/unittests/acl_backend/MIRToDOM.cpp +++ b/contrib/nnc/unittests/acl_backend/MIRToDOM.cpp @@ -370,11 +370,12 @@ TEST(acl_backend_mir_to_dom, fully_connected) { const int32_t out_size = 7; Shape input_shape_data{1, in_size}; Shape weights_shape{in_size, out_size}; - TensorVariant weights = createTensorVariant(weights_shape); + TensorVariant weights_tensor = createTensorVariant(weights_shape); Graph g; - OpConstructor opGenerator = [weights](Graph& g, const vector& inputs) { - return g.create("fc", inputs[0], weights); + OpConstructor opGenerator = [weights_tensor](Graph& g, const vector& inputs) { + auto weights = g.create("", weights_tensor)->getOutput(0); + return g.create("fc", inputs[0], weights); }; fillGraph(g, opGenerator, {input_shape_data}); diff --git a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp index 1a32faf..4234bc0 100644 --- a/contrib/nnc/unittests/soft_backend/CPPOperations.cpp +++ b/contrib/nnc/unittests/soft_backend/CPPOperations.cpp @@ -673,16 +673,17 @@ TEST(cpp_operations_test, depthwise_conv) { TEST(cpp_operations_test, fully_connected) { vector input_shape_data{3, 13}; - mir::Shape weights_shape{13, 7}; - vector> input_ntensors(1); - Tensor input_atensor; - fillTensors(input_ntensors[0], input_atensor, input_shape_data, 1.0f); - mir::TensorVariant weights = createNTensor(weights_shape, 1.0f); - auto op_generator = [&weights](mir::Graph& g, const std::vector& inputs) { - return g.create("y", inputs[0], weights); + vector weights_shape_data{13, 7}; + vector> input_ntensors(2); + Tensor input_atensor0; + Tensor input_atensor1; + fillTensors(input_ntensors[0], input_atensor0, input_shape_data, 1.0f); + fillTensors(input_ntensors[1], input_atensor1, weights_shape_data, 1.0f); + auto op_generator = [](mir::Graph& g, const std::vector& inputs) { + return g.create("y", inputs[0], inputs[1]); }; - createAndRunTestGraph(op_generator, fullConnect, input_ntensors, input_atensor); + createAndRunTestGraph(op_generator, fullConnect, input_ntensors, input_atensor0, input_atensor1); } TEST(cpp_operations_test, resize_NN_test) { -- 2.7.4