From ea5a0925d4573128324bc9c69af4b4a19510ed9b Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=A2=D0=B8=D0=BC=D1=83=D1=80=20=D0=9E=D1=82=D0=B5=D0=BB?= =?utf8?q?=D0=BB=D0=BE=D0=B2=D0=B8=D1=87=20=D0=90=D0=B1=D0=BB=D1=8F=D0=B7?= =?utf8?q?=D0=B8=D0=BC=D0=BE=D0=B2/AI=20Tools=20Lab=20/SRR/Staff=20Enginee?= =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Tue, 13 Nov 2018 19:27:02 +0300 Subject: [PATCH] [nnc] Problems fixed during the debug of the Convolution and the Softmax operations (#2142) - Tensor and tensor shapes transposed in certain places. - Serialization operation changed to save tensors starting from lower to higher dimensions (to be easily readable by the standard ACL routine). Signed-off-by: Timur Ablyazimov --- contrib/nnc/include/core/modelIR/TensorUtil.h | 6 ++ .../passes/acl_soft_backend/AclCppOpGenerator.h | 7 +- .../acl_soft_backend/AclArtifactUtilities.in | 2 +- .../passes/acl_soft_backend/AclCppOpGenerator.cpp | 74 +++++++++++++++------- 4 files changed, 59 insertions(+), 30 deletions(-) diff --git a/contrib/nnc/include/core/modelIR/TensorUtil.h b/contrib/nnc/include/core/modelIR/TensorUtil.h index b49890e..dfbe81c 100644 --- a/contrib/nnc/include/core/modelIR/TensorUtil.h +++ b/contrib/nnc/include/core/modelIR/TensorUtil.h @@ -31,6 +31,12 @@ namespace mir { template +Shape transposeShape(const Shape& shape) { + Shape result{shape.dim(Ints)...}; + return result; +} + +template static std::shared_ptr transposeTensor(std::shared_ptr tensor) { diff --git a/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h b/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h index 216f246..1b23d14 100644 --- a/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h +++ b/contrib/nnc/include/passes/acl_soft_backend/AclCppOpGenerator.h @@ -166,10 +166,10 @@ private: /** * @brief Generates a DOM tensor. * @param node - node for which this tensor generated. - * @param op - an IR operation for which this tensor is generated. + * @param ir_shape - a shape in IR. * @return - a DOM identifier for the created tensor. */ - std::shared_ptr genTensor(mir::INode* node, mir::OpDescription& op); + std::shared_ptr genTensor(mir::INode* node, const mir::Shape& ir_shape); /** * @brief Generates accessors for the input/output tensors. @@ -178,11 +178,8 @@ private: /** * @brief Serializes a tensor. - * @tparam Ints - transposes to use during serialization. Needed because ACL can use different - * tensor layouts than the model IR. * @param tensor - tensor to serialize. */ - template void serializeTensor(const mir::TensorVariant& tensor); std::set _inputs; diff --git a/contrib/nnc/passes/acl_soft_backend/AclArtifactUtilities.in b/contrib/nnc/passes/acl_soft_backend/AclArtifactUtilities.in index c51a00d..e68bd48 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclArtifactUtilities.in +++ b/contrib/nnc/passes/acl_soft_backend/AclArtifactUtilities.in @@ -24,7 +24,7 @@ static void deserializeTensor(std::istream& par_in, arm_compute::CLTensor& tenso window.use_tensor_dimensions(tensor.info()->tensor_shape()); arm_compute::Iterator iter(&tensor, window); arm_compute::execute_window_loop(window, [&par_in, &iter](const arm_compute::Coordinates&) { - par_in.read(iter.ptr(), sizeof(float)); + par_in.read(reinterpret_cast(iter.ptr()), sizeof(float)); }, iter); tensor.unmap(); diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp index 2d5ca59..6f4774e 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp @@ -70,7 +70,7 @@ void AclCppOpGenerator::visit(INode* node, ops::ConcatOp& op) { "arm_compute::DataLayoutDimension::BATCHES"}; assert(op.getAxis() < sizeof(axis_names) / sizeof(const char*)); - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); auto prefix = out->name() + "_concatenate_layer"; auto inputs_var = _constrBlock->var("std::vector", prefix + "_inputs"); auto inputs = inputs_var->use(); @@ -99,7 +99,7 @@ void AclCppOpGenerator::visit(INode* node, ops::SoftmaxOp& op) { assert(prev_nodes.size() == 1); auto in_node = prev_nodes[0].node; auto in = AF::id(tensorName(in_node)); - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); auto sm_layer_var = _artifactClass->var(false, "arm_compute::CLSoftmaxLayer", out->name() + "_softmax_layer"); auto sm_layer = sm_layer_var->use(); @@ -126,7 +126,7 @@ void AclCppOpGenerator::visit(INode* node, ops::PoolOp& op) { auto in_node = prev_nodes[0].node; auto in = AF::id(tensorName(in_node)); - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); auto prefix = out->name() + "_pooling_layer"; auto pad_stride_info_var = _constrBlock->var("arm_compute::PadStrideInfo", @@ -163,14 +163,14 @@ void AclCppOpGenerator::visit(INode* node, ops::FullyConnectedOp& op) { auto in = AF::id(tensorName(in_node)); // Create the output tensor in the DOM. - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); string operation_name = out->name() + "_fully_connected_layer"; // Create the weights tensor in the DOM and use its id. auto weights = genTensor(operation_name + "_weights", ir_weights_shape); // Serialize the weights tensor and generate the function to deserialize it in the artifact. - serializeTensor<3, 2, 1, 0>(ir_weights); + serializeTensor(ir_weights); _constrBlock->call("deserializeTensor", {_parIn, weights}); // Instantiate the CLFullyConnectedLayer object. @@ -202,7 +202,7 @@ void AclCppOpGenerator::visit(INode* node, ops::BiasAddOp& op) { auto in = AF::id(tensorName(in_node)); // Create the output tensor in the DOM and obtain its identifier. - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); // Prefix used for the name of variables related to the operation implementation. string operation_name = out->name() + "_bias_add_layer"; @@ -237,7 +237,9 @@ void AclCppOpGenerator::visit(INode* node, ops::BiasAddOp& op) { } void AclCppOpGenerator::visit(INode* node, ops::VariableOp& op) { - genTensor(node, op); + // Axes order is HWC in the Model IR and WHC in the ACL library, so we are switching the first + // two dimensions. + genTensor(node, transposeShape<1, 0, 2>(op.getOutputShape(0))); } void AclCppOpGenerator::visit(INode* node, ops::ReluOp& op) { @@ -253,7 +255,7 @@ void AclCppOpGenerator::visit(INode* node, ops::ReshapeOp& op) { auto in = AF::id(tensorName(in_node)); // Create the output tensor in the DOM and return its id. - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); // Create an instance of the CLReshapeLayer class as a member of the artifact class. auto reshape_layer_var = _artifactClass->var(false, "arm_compute::CLReshapeLayer", @@ -278,7 +280,7 @@ void AclCppOpGenerator::visit(INode* node, ops::ScaleOp& op) { auto in = AF::id(tensorName(in_node)); // Generate output tensor description in the DOM. - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); auto prefix = out->name() + "_scale_layer"; // Create a CLPixelWiseMultiplication instance. @@ -330,7 +332,7 @@ void AclCppOpGenerator::visit(INode* node, ops::DropoutOp& op) { auto in = AF::id(tensorName(in_node)); // Generate output tensor description in the DOM. - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); // Create a CLCopy instance. auto copy_layer_var = _artifactClass->var(false, "arm_compute::CLCopy", @@ -350,7 +352,7 @@ void AclCppOpGenerator::visit(INode* node, ops::TanhOp& op) { void AclCppOpGenerator::visit(INode* node, ops::ElementwiseOp& op) { // Create the output tensor in the DOM and obtain its identifier. - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); auto& prev_nodes = node->getPrevNodes(); assert(prev_nodes.size() >= 2); @@ -395,8 +397,8 @@ void AclCppOpGenerator::visit(INode* node, ops::EluOp& op) { template void AclCppOpGenerator::genConvolution(INode* node, Op& op, const string& acl_func_name, const string& suffix) { - const TensorVariant& ir_weights = op.getKernel(); - const Shape& ir_weights_shape = ir_weights.getShape(); + auto ir_weights = transposeTensor<1, 0, 2, 3>(make_shared(op.getKernel())); + const Shape& ir_weights_shape = ir_weights->getShape(); assert(ir_weights_shape.rank() == 4); Shape ir_biases_shape({ir_weights_shape.dim(-1)}); const Shape& strides = op.getStrides(); @@ -413,14 +415,14 @@ void AclCppOpGenerator::genConvolution(INode* node, Op& op, const string& acl_fu auto in = AF::id(tensorName(in_node)); // Create the output tensor in the DOM. - auto out = genTensor(node, op); + auto out = genTensor(node, transposeShape<1, 0, 2>(op.getOutputShape(0))); string operation_name = out->name() + suffix; // Generate a tensor for weights (kernel) in the DOM. auto weights = genTensor(operation_name + "_weights", ir_weights_shape); // Save the IR weights tensor to later read this in the artifact. - serializeTensor<3, 2, 1, 0>(ir_weights); + serializeTensor(*ir_weights); _constrBlock->call("deserializeTensor", {_parIn, weights}); // Create a local variable of type PadStrideInfo in the artifact constructor: @@ -465,7 +467,7 @@ void AclCppOpGenerator::genActivation(INode* node, OpDescription& op, auto in = AF::id(tensorName(in_node)); // Create the output tensor in the DOM and return its id. - auto out = genTensor(node, op); + auto out = genTensor(node, op.getOutputShape(0)); auto prefix = out->name() + "_activation_layer"; // Create an instance of the ActivationLayerInfo class as a local variable in the artifact @@ -616,14 +618,14 @@ shared_ptr AclCppOpGenerator::genTensor(const string& name, const Sh return id; } -shared_ptr AclCppOpGenerator::genTensor(INode* node, OpDescription& op) { +shared_ptr AclCppOpGenerator::genTensor(INode* node, const Shape& ir_shape) { if (node->getPrevNodes().empty()) _inputs.insert(node); if (node->getNextNodes().empty()) _outputs.insert(node); - return genTensor(tensorName(node), op.getOutputShape(0), !node->getName().empty()); + return genTensor(tensorName(node), ir_shape, !node->getName().empty()); } void AclCppOpGenerator::genNamed() { @@ -642,14 +644,38 @@ void AclCppOpGenerator::genNamed() { } } -template void AclCppOpGenerator::serializeTensor(const TensorVariant& tensor) { - shared_ptr to_tranpose = make_shared(tensor); - shared_ptr transposed = transposeTensor(to_tranpose); - const Shape& shape = transposed->getShape(); + const Shape& shape = tensor.getShape(); + Index coords; + coords.resize(shape.rank()); + Index dimensions; + dimensions.resize(shape.rank()); + + for (int i = 0; i < shape.rank(); ++i) { + coords.at(i) = 0; + dimensions.at(i) = shape.dim(i); + } + + for (;;) { + _parOut.write(tensor.at(coords), tensor.getElementSize()); + bool stop = true; + int i; - for (auto& idx: ShapeRange(shape)) - _parOut.write(tensor.at(idx), sizeof(float)); + for (i = 0; i < shape.rank(); ++i) { + if(coords.at(i) < dimensions.at(i) - 1) { + ++coords.at(i); + stop = false; + break; + } + } + + if (stop) { + break; + } else { + for (int j = 0; j < i; ++j) + coords.at(j) = 0; + } + } } void AclCppOpGenerator::visit(INode* node, ops::SqueezeOp& op) { -- 2.7.4