From: Efimov Alexander/AI Tools Lab/./Samsung Electronics Date: Fri, 30 Nov 2018 17:36:50 +0000 (+0300) Subject: [nnc] Support batch axis in acl backend (#2462) X-Git-Tag: nncc_backup~1222 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=31e852b44047ff49fdb5797587131170e856a243;p=platform%2Fcore%2Fml%2Fnnfw.git [nnc] Support batch axis in acl backend (#2462) - Support batch axis - Add more checks on supported data formats - Add assert in transposeShape and related fixes Signed-off-by: Efimov Alexander --- diff --git a/contrib/nnc/include/core/modelIR/TensorUtil.h b/contrib/nnc/include/core/modelIR/TensorUtil.h index d289b16..0685670 100644 --- a/contrib/nnc/include/core/modelIR/TensorUtil.h +++ b/contrib/nnc/include/core/modelIR/TensorUtil.h @@ -34,6 +34,9 @@ namespace mir template Shape transposeShape(const Shape& shape) { std::vector permutes{Ints...}; + + assert(permutes.size() == shape.rank()); + Shape result(shape); int32_t nof_permutes = std::min(shape.rank(), permutes.size()); diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp index 06aa194..c04d3c1 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp @@ -81,13 +81,14 @@ const ArtifactModule& AclCppOpGenerator::generate(mir::Graph* g) { } void AclCppOpGenerator::visit(ops::ConcatOp& op) { - static const char* axis_names[] = {"arm_compute::DataLayoutDimension::HEIGHT", + static const char* axis_names[] = {"arm_compute::DataLayoutDimension::BATCHES", + "arm_compute::DataLayoutDimension::HEIGHT", "arm_compute::DataLayoutDimension::WIDTH", "arm_compute::DataLayoutDimension::CHANNEL"}; int axis = op.getAxis() < 0 ? op.getOutputShape(0).rank() + op.getAxis() : op.getAxis(); assert(axis < sizeof(axis_names) / sizeof(const char*)); - auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0))); + auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0))); auto prefix = out->name() + "_concatenate_layer"; auto inputs_var = _constrBlock->var("std::vector", prefix + "_inputs"); auto inputs = inputs_var->use(); @@ -206,7 +207,7 @@ static shared_ptr genPadStrideInfo(const Oper& op, const string& prefix, ArtifactBlock* block) { using AF = ArtifactFactory; - const Shape& strides = transposeShape<1, 0>(op.getStrides()); + const Shape& strides = transposeShape<1, 0, 2>(op.getStrides()); assert(strides.rank() == 3 && strides.dim(2) == 1); // array of paddings @@ -273,7 +274,7 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) { pooling_type = "arm_compute::PoolingType::AVG"; break; default: - assert(false && "Not a supported pooling type"); + throw AclCppException("Unsupported pooling type"); } auto& prev_nodes = op.getPrevNodes(); @@ -281,7 +282,7 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) { auto in_op = prev_nodes[0].op; auto in = AF::id(tensorName(in_op)); - auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0))); + auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0))); auto prefix = out->name() + "_pooling_layer"; auto pad_stride_info_var = genPadStrideInfo(op, prefix, _constrBlock); @@ -314,7 +315,10 @@ void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) { auto in = AF::id(tensorName(in_op)); // Create the output tensor in the DOM. - auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0))); + const Shape& out_shape = op.getOutputShape(0); + if (out_shape.rank() != 2) + throw AclCppException("Unsupported number of dimensions in fc layer"); + auto out = genTensor(op, transposeShape<1, 0>(out_shape)); string operation_name = out->name() + "_fully_connected_layer"; // Create the weights tensor in the DOM and use its id. @@ -346,7 +350,22 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) { auto in = AF::id(tensorName(in_op)); // Create the output tensor in the DOM and obtain its identifier. - auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0))); + const Shape& out_shape = op.getOutputShape(0); + Shape transposed_shape; + switch (out_shape.rank()) { + case 4: + transposed_shape = transposeShape<2, 1, 3, 0>(out_shape); + break; + case 2: + transposed_shape = transposeShape<1, 0>(out_shape); + break; + case 1: + transposed_shape = out_shape; + break; + default: + throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank())); + } + shared_ptr out = genTensor(op, transposed_shape); // Prefix used for the name of variables related to the operation implementation. string operation_name = out->name() + "_bias_add_layer"; @@ -363,10 +382,10 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) { ir_biases_shape = transposeShape<1, 0>(op.getInputShape(0)); } else { // ACL CLArithmeticAddition supports input tensors broadcasting. - for (int i = 0; i < ir_input_shape.rank() - 1; ++i) + for (int i = 0; i < ir_input_shape.rank(); ++i) ir_biases_shape.dim(i) = 1; - ir_biases_shape.dim(-1) = ir_biases.getShape().dim(0); + ir_biases_shape.dim(2) = ir_biases.getShape().dim(0); } auto biases = genTensor(operation_name + "_biases", ir_biases_shape); @@ -382,7 +401,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) { } void AclCppOpGenerator::visit(ops::VariableOp& op) { - auto tensor = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0))); + auto tensor = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0))); allocate(tensor); } @@ -404,7 +423,31 @@ void AclCppOpGenerator::visit(ops::ReshapeOp& op) { auto in = AF::id(tensorName(in_op)); // Create the output tensor in the DOM and return its id. - auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0))); + const Shape& out_shape = op.getOutputShape(0); + + // This check confirms that we can "safely" reshape data + // The only safe configuration of output shape is (1...1, N, 1 ... 1) + bool found_non_one = false; + for (int32_t i = 0; i < out_shape.rank(); ++i) { + if (out_shape.dim(i) != 1) { + if (found_non_one) + throw AclCppException("Unsupported result of reshape"); + found_non_one = true; + } + } + + Shape transposed_shape; + switch (out_shape.rank()) { + case 2: + transposed_shape = transposeShape<1, 0>(out_shape); + break; + case 1: + transposed_shape = out_shape; + break; + default: + throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank())); + } + shared_ptr out = genTensor(op, transposed_shape); // Create an instance of the CLReshapeLayer class as a member of the artifact class. auto layer = genLayer("arm_compute::CLReshapeLayer", out->name() + "_reshape_layer", @@ -423,22 +466,38 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) { // Get input tensor identifier in the generated artifact. auto in = AF::id(tensorName(in_op)); - // Generate output tensor description in the DOM. - auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0))); + // Create the output tensor in the DOM and obtain its identifier. + const Shape& out_shape = op.getOutputShape(0); + Shape transposed_shape; + switch (out_shape.rank()) { + case 4: + transposed_shape = transposeShape<2, 1, 3, 0>(out_shape); + break; + case 2: + transposed_shape = transposeShape<1, 0>(out_shape); + break; + case 1: + transposed_shape = out_shape; + break; + default: + throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank())); + } + shared_ptr out = genTensor(op, transposed_shape); + auto operation_name = out->name() + "_scale_layer"; const auto& ir_scales = op.getWeights(); // Reshape the IR scales tensor and generate the corresponding DOM tensor. Shape ir_scales_shape; - const auto ir_input_shape = transposeShape<1, 0, 2>(op.getInputShape(0)); + const auto ir_input_shape = transposeShape<2, 1, 3, 0>(op.getInputShape(0)); ir_scales_shape.resize(ir_input_shape.rank()); // ACL CLArithmeticDivision supports input tensors broadcasting. - for (int i = 0; i < ir_input_shape.rank() - 1; ++i) + for (int i = 0; i < ir_input_shape.rank(); ++i) ir_scales_shape.dim(i) = 1; - ir_scales_shape.dim(-1) = ir_scales.getShape().dim(0); + ir_scales_shape.dim(2) = ir_scales.getShape().dim(0); auto scales = genTensor(operation_name + "_scales", ir_scales_shape); // We do not use the genMultiplication() function here because the input needs broadcasting. @@ -558,7 +617,7 @@ void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, cons auto in = AF::id(tensorName(in_op)); // Create the output tensor in the DOM. - auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0))); + auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0))); string operation_name = out->name() + suffix; // Generate a tensor for weights (kernel) in the DOM. @@ -600,7 +659,7 @@ void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& act auto in = AF::id(tensorName(in_op)); // Create the output tensor in the DOM and return its id. - auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0))); + auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0))); auto prefix = out->name() + "_activation_layer"; // Create an instance of the ActivationLayerInfo class as a local variable in the artifact diff --git a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp index 952a039..e62edcc 100644 --- a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp +++ b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp @@ -151,7 +151,7 @@ static int getAxisValue(const OptsType& opts) { // axis 1 represents channels in caffe, in Model ir it is second dimension for now if (axis == 1) - return 2; + return 3; return axis; } @@ -400,15 +400,11 @@ CaffeOpCreator::convertPooling(const std::vector& inputs, std::vector CaffeOpCreator::convertSoftmax(const std::vector& inputs, const caffe::SoftmaxParameter& opts) { + assert(inputs.size() == 1); auto input = inputs[0]; auto& input_shape = input.op->getOutputShape(input.index); // Workaround until we've got Transpose operation. assert(input_shape.rank() == 4 || input_shape.rank() == 2); - if (input_shape.rank() == 4) { - assert(input_shape.dim(0) == 1); - Shape new_shape{input_shape.dim(1), input_shape.dim(2), input_shape.dim(3)}; - input = createOp(input, new_shape)->getOutput(0); - } auto softmax = createOp(input, getAxisValue(opts)); return {softmax->getOutput(0)}; }