From: Сергей Баранников/AI Tools Lab /SRR/Engineer/삼성전자 Date: Mon, 28 Jan 2019 10:24:38 +0000 (+0300) Subject: [nnc] Prepare the ACL backend for the future changes related to adding of the Tensor... X-Git-Tag: nncc_backup~920 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d022bc48f9ba7b33e560d37e08f90c6eca53e6fb;p=platform%2Fcore%2Fml%2Fnnfw.git [nnc] Prepare the ACL backend for the future changes related to adding of the Tensor class in the ModelIR (#2933) * Change the signature of `tensorName` and `genTensor` methods to accept `IODescriptor` instead of `Operation`. * Use the `collectInputs` and `collectOutputs` methods of Graph class instead of collecting input and output operations manually in the ACL backend. * Add `getInput` method to `Operation` class. * Add uses of `getInput` method in the ACL backend. Signed-off-by: Sergei Barannikov --- diff --git a/contrib/nnc/include/core/modelIR/Operation.h b/contrib/nnc/include/core/modelIR/Operation.h index 86c7b90..9794ccc 100644 --- a/contrib/nnc/include/core/modelIR/Operation.h +++ b/contrib/nnc/include/core/modelIR/Operation.h @@ -56,6 +56,11 @@ public: std::size_t getNumInputs() const { return _num_inputs; } std::size_t getNumOutputs() const { return _num_outputs; } + IODescriptor getInput(std::size_t index) { + assert(index < _inputs.size()); + return _inputs[index]; + } + const IODescriptor getOutput(std::size_t index); const std::vector& getPrevNodes() const { return _inputs; } diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp index edb2733..9b745f3 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.cpp @@ -92,7 +92,7 @@ const ArtifactModule& AclCppOpGenerator::generate(mir::Graph* g) { g->accept(this); // Generate all the deferred entities. - genNamed(); + genNamed(g); genPersistentTensorAllocations(); genDeserializations(); genFillings(); @@ -104,6 +104,9 @@ const ArtifactModule& AclCppOpGenerator::generate(mir::Graph* g) { } void AclCppOpGenerator::visit(ops::ConcatOp& op) { + const auto& ir_inputs = op.getPrevNodes(); + IODescriptor ir_output = op.getOutput(0); + static const char* axis_names[] = {"arm_compute::DataLayoutDimension::BATCHES", "arm_compute::DataLayoutDimension::CHANNEL", "arm_compute::DataLayoutDimension::HEIGHT", @@ -114,13 +117,13 @@ void AclCppOpGenerator::visit(ops::ConcatOp& op) { "axis outside this range is not supported in ACL"); const char* axis_name = axis_names[axis]; - auto out = genTensor(op, op.getOutputShape(0)); + auto out = genTensor(ir_output); auto prefix = out->name() + "_concatenate_layer"; auto inputs_var = _constrBlock->var("std::vector", prefix + "_inputs"); auto inputs = inputs_var->use(); - for (auto i : op.getPrevNodes()) - _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(i.op)))}, inputs); + for (IODescriptor ir_input : ir_inputs) + _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(ir_input)))}, inputs); auto layer = genLayer("arm_compute::CLConcatenateLayer", prefix, {inputs, AF::ref(out), AF::lit(axis_name)}); @@ -138,12 +141,13 @@ void AclCppOpGenerator::visit(ops::DepthwiseConv2DOp& op) { } void AclCppOpGenerator::visit(ops::SoftmaxOp& op) { - auto& in_ops = op.getPrevNodes(); - assert(in_ops.size() == 1); - auto in_op = in_ops[0].op; - auto in = AF::id(tensorName(in_op)); + assert(op.getNumInputs() == 1); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_output = op.getOutput(0); + + auto in = AF::id(tensorName(ir_input)); - int rank = op.getOutputShape(0).rank(); + int rank = ir_output.getShape().rank(); // CLPermute does not support all kinds of permutations now. // rank can be more than 2 in our models, so we can not use CLTranspose. // This means we can support tensors with no more then one axis > 1. @@ -152,7 +156,7 @@ void AclCppOpGenerator::visit(ops::SoftmaxOp& op) { int nof_long_axes = 0; for (int i = 0; i < rank; ++i) { - if (op.getOutputShape(0).dim(i) > 1) + if (ir_output.getShape().dim(i) > 1) ++nof_long_axes; } @@ -161,9 +165,7 @@ void AclCppOpGenerator::visit(ops::SoftmaxOp& op) { throw AclCppException("Unsupported Softmax operation with several dimensions greater than 1"); // Create the output tensor. - const Shape& in_out_shape = op.getOutputShape(0); - - shared_ptr output = genTensor(op, in_out_shape); + shared_ptr output = genTensor(ir_output); auto layer_name_prefix = output->name(); if (axis == 0) { @@ -180,7 +182,7 @@ void AclCppOpGenerator::visit(ops::SoftmaxOp& op) { // Then we need two tensors for intermediate results. This is because we do a couple of auxiliary // reshapes: one to transform the input tensor to a unidimensional tensor and the second to // transorm the result of the softmax operation back to the original form. - Shape sm_shape(in_out_shape); + Shape sm_shape(ir_output.getShape()); std::swap(sm_shape.dim(axis), sm_shape.dim(-1)); @@ -266,6 +268,10 @@ AclCppOpGenerator::genTransposeACLtoMIR(const string& name, } void AclCppOpGenerator::visit(ops::PoolOp& op) { + assert(op.getNumInputs() == 1); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_output = op.getOutput(0); + const char* pooling_type = nullptr; switch (op.getPoolingType()) { @@ -278,20 +284,16 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) { default: throw AclCppException("Unsupported pooling type"); } - - auto& prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() == 1); - auto in_op = prev_nodes[0].op; - string in_name = tensorName(in_op); + string in_name = tensorName(ir_input); auto in_id = AF::id(in_name); - const string output_tensor_name = tensorName(&op); + const string output_tensor_name = tensorName(ir_output); // Transpose data from MIR format to format compatible with ACL const string transposed_input_name = output_tensor_name + "transposed_input"; shared_ptr transposed_input = - genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), in_id); + genTransposeMIRtoACL(transposed_input_name, ir_input.getShape(), in_id); const string layer_name = output_tensor_name + "_pooling_layer"; @@ -315,7 +317,7 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) { shared_ptr pooling_info = pooling_info_var->use(); // Generate auxiliary tensor to hold transposed output of pool in NCHW format - Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(op.getOutputShape(0)); + Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output.getShape()); shared_ptr transposed_output = genTensor(layer_name + "_out_transpose", transposed_output_shape); @@ -330,31 +332,28 @@ void AclCppOpGenerator::visit(ops::PoolOp& op) { genTensorDeallocation(_infBlock, transposed_input); genTensorDeallocation(_infBlock, transposed_output); - - if (op.getNextNodes().empty()) - _outputs.insert(&op); } void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) { - auto& prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() == 2); + assert(op.getNumInputs() == 2); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_weights = op.getInput(1); + IODescriptor ir_output = op.getOutput(0); - auto in_op = prev_nodes[0].op; - auto ir_weights_op = dynamic_cast(prev_nodes[1].op); + auto ir_weights_op = dynamic_cast(ir_weights.op); if (ir_weights_op == nullptr) throw AclCppException("Unsupported operation type"); - const TensorVariant ir_weights = transposeTensor<1, 0>(ir_weights_op->getValue()); + const TensorVariant ir_weights_tensor = transposeTensor<1, 0>(ir_weights_op->getValue()); const Shape& ir_weights_shape = ir_weights.getShape(); // Get the input node tensor id in the DOM. - auto in = AF::id(tensorName(in_op)); + auto in = AF::id(tensorName(ir_input)); // Create the output tensor in the DOM. - const Shape& out_shape = op.getOutputShape(0); - if (out_shape.rank() != 2) + if (ir_output.getShape().rank() != 2) throw AclCppException("Unsupported number of dimensions in fc layer"); - auto out = genTensor(op, out_shape); + auto out = genTensor(ir_output); string operation_name = out->name() + "_fully_connected_layer"; // Create the weights tensor in the DOM and use its id. @@ -366,7 +365,7 @@ void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) { addToPersistentTensors(weights); // Serialize the weights tensor and generate the function to deserialize it in the artifact. - serializeTensor(weights, ir_weights); + serializeTensor(weights, ir_weights_tensor); addToPersistentTensors(out); genLayerExecution(layer); } @@ -380,27 +379,29 @@ void AclCppOpGenerator::visit(ops::CappedReluOp& op) { } void AclCppOpGenerator::visit(ops::BiasAddOp& op) { - auto& prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() == 2); - auto in_op = prev_nodes[0].op; - auto ir_biases_op = dynamic_cast(prev_nodes[1].op); - if (ir_biases_op == nullptr) + assert(op.getNumInputs() == 2); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_weights = op.getInput(1); + IODescriptor ir_output = op.getOutput(0); + + auto ir_weights_op = dynamic_cast(ir_weights.op); + if (ir_weights_op == nullptr) throw AclCppException("Unsupported operation type"); - const auto& ir_biases = ir_biases_op->getValue(); - assert(ir_biases.getShape().rank() == 1); + const auto& ir_weights_tensor = ir_weights_op->getValue(); + assert(ir_weights_tensor.getShape().rank() == 1); // Get the input node tensor id in the DOM. - shared_ptr input = AF::id(tensorName(in_op)); + shared_ptr input = AF::id(tensorName(ir_input)); - const string output_tensor_name = tensorName(&op); + const string output_tensor_name = tensorName(ir_output); shared_ptr transposed_input; Shape transposed_output_shape; shared_ptr transposed_output; // Create the output tensor in the DOM and obtain its identifier. - const Shape& out_shape = op.getOutputShape(0); + const Shape& out_shape = ir_output.getShape(); const string transposed_output_name = output_tensor_name + "_transposed_output"; switch (out_shape.rank()) { @@ -408,7 +409,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) { // transpose input to NCHW format supported by ACL const string transposed_input_name = output_tensor_name + "_transposed_input"; transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape); - transposed_input = genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), input); + transposed_input = genTransposeMIRtoACL(transposed_input_name, ir_input.getShape(), input); transposed_output = genTensor(transposed_output_name, transposed_output_shape); @@ -418,7 +419,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) { case 1: transposed_output_shape = out_shape; transposed_input = input; - transposed_output = genTensor(tensorName(&op), out_shape); + transposed_output = genTensor(tensorName(ir_output), out_shape); break; default: throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank())); @@ -428,14 +429,14 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) { string layer_name = transposed_output->name() + "_bias_add_layer"; // Reshape the IR biases tensor and generate the corresponding DOM tensor. - const auto ir_input_shape = op.getInputShape(0); + const auto& ir_input_shape = ir_input.getShape(); Shape ir_biases_shape(ir_input_shape.rank()); // ACL CLArithmeticAddition supports input tensors broadcasting. for (int i = 0; i < ir_input_shape.rank(); ++i) ir_biases_shape.dim(i) = 1; - ir_biases_shape.dim(1) = ir_biases.getShape().dim(0); + ir_biases_shape.dim(1) = ir_weights_tensor.getShape().dim(0); auto biases = genTensor(layer_name + "_biases", ir_biases_shape); // Instantiate the CLArithmeticAddition object. @@ -445,7 +446,7 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) { addToPersistentTensors(biases); // Save the IR biases tensor to later read this in the artifact. - serializeTensor(biases, ir_biases); + serializeTensor(biases, ir_weights_tensor); genTensorAllocation(_infBlock, transposed_output); genLayerExecution(layer); @@ -457,14 +458,11 @@ void AclCppOpGenerator::visit(ops::BiasAddOp& op) { genTensorDeallocation(_infBlock, transposed_input); genTensorDeallocation(_infBlock, transposed_output); } - - if (op.getNextNodes().empty()) - _outputs.insert(&op); } void AclCppOpGenerator::visit(ops::InputOp& op) { shared_ptr tensor; - tensor = genTensor(op, op.getOutputShape(0)); + tensor = genTensor(op.getOutput(0)); addToPersistentTensors(tensor); } @@ -499,9 +497,8 @@ static bool shouldSerializeConstant(ops::ConstantOp& op) { void AclCppOpGenerator::visit(ops::ConstantOp& op) { if (shouldSerializeConstant(op)) { - Shape out_shape = op.getOutputShape(0); TensorVariant data = op.getValue(); - shared_ptr out = genTensor(op, out_shape); + shared_ptr out = genTensor(op.getOutput(0)); addToPersistentTensors(out); serializeTensor(out, data); } @@ -512,15 +509,15 @@ void AclCppOpGenerator::visit(ops::ReluOp& op) { } void AclCppOpGenerator::visit(ops::ReshapeOp& op) { - auto& prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() == 1); + assert(op.getNumInputs() == 1); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_output = op.getOutput(0); // Get the id of the input tensor in the generated artifact. - auto in_op = prev_nodes[0].op; - auto in = AF::id(tensorName(in_op)); + auto in = AF::id(tensorName(ir_input)); // Create the output tensor in the DOM and return its id. - const Shape& out_shape = op.getOutputShape(0); + const Shape& out_shape = ir_output.getShape(); // This check confirms that we can "safely" reshape data // The only safe configuration of output shape is (1...1, N, 1 ... 1) @@ -533,7 +530,7 @@ void AclCppOpGenerator::visit(ops::ReshapeOp& op) { } } - shared_ptr out = genTensor(op, out_shape); + shared_ptr out = genTensor(ir_output); // Create an instance of the CLReshapeLayer class as a member of the artifact class. auto layer = genLayer("arm_compute::CLReshapeLayer", out->name() + "_reshape_layer", @@ -545,28 +542,30 @@ void AclCppOpGenerator::visit(ops::ReshapeOp& op) { void AclCppOpGenerator::visit(ops::ScaleOp& op) { // May be not a perfect implementation, using the CLPixelWiseMultiplication ACL function taking // two input tensors with the same shapes. - auto prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() == 2); - auto in_op = prev_nodes[0].op; - auto ir_scales_op = dynamic_cast(prev_nodes[1].op); - if (ir_scales_op == nullptr) + assert(op.getNumInputs() == 2); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_weights = op.getInput(1); + IODescriptor ir_output = op.getOutput(0); + + auto ir_weights_op = dynamic_cast(ir_weights.op); + if (ir_weights_op == nullptr) throw AclCppException("Unsupported operation type"); - const auto& ir_scales = ir_scales_op->getValue(); - assert(ir_scales.getShape().rank() == 1); + const auto& ir_weights_tensor = ir_weights_op->getValue(); + assert(ir_weights_tensor.getShape().rank() == 1); // Get input tensor identifier in the generated artifact. - auto input = AF::id(tensorName(in_op)); + auto input = AF::id(tensorName(ir_input)); - const string output_tensor_name = tensorName(&op); + const string output_tensor_name = tensorName(ir_output); // transpose input to NCHW format supported by ACL const string transposed_input_name = output_tensor_name + "_transposed_input"; shared_ptr transposed_input = - genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), input); + genTransposeMIRtoACL(transposed_input_name, ir_input.getShape(), input); // Create the output tensor in the DOM and obtain its identifier. - const Shape& out_shape = op.getOutputShape(0); + const Shape& out_shape = ir_output.getShape(); Shape transposed_output_shape; switch (out_shape.rank()) { case 4: @@ -587,14 +586,14 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) { auto operation_name = transposed_output->name() + "_scale_layer"; // Reshape the IR scales tensor and generate the corresponding DOM tensor. - const Shape ir_input_shape = transposeShape<0, 3, 1, 2>(op.getInputShape(0)); + const Shape ir_input_shape = transposeShape<0, 3, 1, 2>(ir_input.getShape()); Shape ir_scales_shape(ir_input_shape.rank()); // ACL CLArithmeticDivision supports input tensors broadcasting. for (int i = 0; i < ir_input_shape.rank(); ++i) ir_scales_shape.dim(i) = 1; - ir_scales_shape.dim(1) = ir_scales.getShape().dim(0); + ir_scales_shape.dim(1) = ir_weights_tensor.getShape().dim(0); auto scales = genTensor(operation_name + "_scales", ir_scales_shape); // We do not use the genMultiplication() function here because the input needs broadcasting. @@ -619,7 +618,7 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) { addToPersistentTensors(scales); // Save the IR scales tensor to later read this in the artifact. - serializeTensor(scales, ir_scales); + serializeTensor(scales, ir_weights_tensor); addToPersistentTensors(unit); // Fill the unit tensor with the 1 value. fillTensor(unit, "1"); @@ -633,9 +632,6 @@ void AclCppOpGenerator::visit(ops::ScaleOp& op) { genTensorDeallocation(_infBlock, transposed_input); genTensorDeallocation(_infBlock, transposed_output); - - if (op.getNextNodes().empty()) - _outputs.insert(&op); } void AclCppOpGenerator::visit(mir::ops::SliceOp&) { @@ -648,16 +644,17 @@ void AclCppOpGenerator::visit(ops::BatchNormOp&) { } void AclCppOpGenerator::visit(ops::DropoutOp& op) { + assert(op.getNumInputs() == 1); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_output = op.getOutput(0); + // Just copy input tensor to the output one. - auto prev_ops = op.getPrevNodes(); - assert(prev_ops.size() == 1); - Operation* in_op = prev_ops[0].op; // Get input tensor identifier in the generated artifact. - std::shared_ptr in = AF::id(tensorName(in_op)); + shared_ptr in = AF::id(tensorName(ir_input)); // Generate output tensor description in the DOM. - shared_ptr out = AF::id(tensorName(&op)); + shared_ptr out = AF::id(tensorName(ir_output)); _constrBlock->var("arm_compute::CLTensor&", out->name(), {}, {in}); } @@ -667,35 +664,34 @@ void AclCppOpGenerator::visit(ops::TanhOp& op) { } void AclCppOpGenerator::visit(ops::ElementwiseOp& op) { + assert(op.getNumInputs() >= 2); + const auto& ir_inputs = op.getPrevNodes(); + IODescriptor ir_output = op.getOutput(0); + // Create the output tensor in the DOM and obtain its identifier. - auto out = genTensor(op, op.getOutputShape(0)); + auto out = genTensor(ir_output); addToPersistentTensors(out); - auto& prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() >= 2); - - auto in_op1 = prev_nodes[0].op; - // Get the identifier of the first input tensor in the DOM. - auto in1 = AF::id(tensorName(in_op1)); + auto in1 = AF::id(tensorName(ir_inputs[0])); - for (size_t i = 1; i < prev_nodes.size(); ++i) { - auto in_op2 = prev_nodes[i].op; + for (size_t i = 1; i < ir_inputs.size(); ++i) { + IODescriptor ir_input = ir_inputs[i]; // Get the identifier of the second input tensor in the DOM. - auto in2 = AF::id(tensorName(in_op2)); + auto in2 = AF::id(tensorName(ir_input)); // Chaining the partial results of binary operations. // On the last iteration the result is saved in the node output. // Different ACL layers used to implement different types of elementwise operations. switch (op.getOpType()) { case ops::ElementwiseOp::OpType::mul: - in1 = genMultiplication(out->name() + "_" + "multiplication", i - 1, op.getInputShape(i), - in1, in2, i == prev_nodes.size() - 1 ? out : nullptr); + in1 = genMultiplication(out->name() + "_" + "multiplication", i - 1, ir_input.getShape(), + in1, in2, i == ir_inputs.size() - 1 ? out : nullptr); break; case ops::ElementwiseOp::OpType::add: - in1 = genAddition(out->name() + "_" + "addition", i - 1, op.getInputShape(i), - in1, in2, i == prev_nodes.size() - 1 ? out : nullptr); + in1 = genAddition(out->name() + "_" + "addition", i - 1, ir_input.getShape(), + in1, in2, i == ir_inputs.size() - 1 ? out : nullptr); break; default: throw AclCppException("This min elementwise operation is currently not supported"); @@ -712,12 +708,15 @@ void AclCppOpGenerator::visit(ops::EluOp&) { } void AclCppOpGenerator::visit(ops::PadOp& op) { + assert(op.getNumInputs() == 1); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_output = op.getOutput(0); + // Get the id of the input tensor. - auto in_op = op.getPrevNodes()[0].op; - auto input = AF::id(tensorName(in_op)); + auto input = AF::id(tensorName(ir_input)); // Create the output tensor in the DOM - auto out = genTensor(op, op.getOutputShape(0)); + auto out = genTensor(ir_output); addToPersistentTensors(out); // Generate PadLayer params @@ -741,30 +740,30 @@ void AclCppOpGenerator::visit(ops::PadOp& op) { template void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, const string& suffix) { - const auto& prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() == 2); + IODescriptor ir_input = op.getPrevNodes()[0]; + IODescriptor ir_weights = op.getPrevNodes()[1]; + IODescriptor ir_output = op.getOutput(0); - auto in_op = prev_nodes[0].op; - auto ir_weights_op = dynamic_cast(prev_nodes[1].op); + auto ir_weights_op = dynamic_cast(ir_weights.op); if (ir_weights_op == nullptr) throw AclCppException("Unsupported operation type"); - auto ir_weights = transposeTensor<3, 2, 0, 1>(ir_weights_op->getValue()); - const Shape& ir_weights_shape = ir_weights.getShape(); + auto ir_weights_tensor = transposeTensor<3, 2, 0, 1>(ir_weights_op->getValue()); + const Shape& ir_weights_shape = ir_weights_tensor.getShape(); // get output tensor name that is used as base for other names - const string output_tensor_name = tensorName(&op); + const string output_tensor_name = tensorName(ir_output); // Get the identifier of the input tensor in the DOM. - auto input = AF::id(tensorName(in_op)); + auto input = AF::id(tensorName(ir_input)); // Generate auxiliary tensor to hold transposed input of convolution in NCHW format shared_ptr transposed_input = - genTransposeMIRtoACL(output_tensor_name + "_transposed_input", op.getInputShape(0), input); + genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input.getShape(), input); // Create the transposed output tensor in the DOM. const string transposed_output_name = output_tensor_name + "_transposed_output"; - Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(op.getOutputShape(0)); + Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output.getShape()); shared_ptr transposed_output = genTensor(transposed_output_name, transposed_output_shape); @@ -796,7 +795,7 @@ void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, cons addToPersistentTensors(weights); // Save the IR weights tensor to later read this in the artifact. - serializeTensor(weights, ir_weights); + serializeTensor(weights, ir_weights_tensor); genTensorAllocation(_infBlock, transposed_output); genLayerExecution(layer); @@ -806,22 +805,19 @@ void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, cons genTensorDeallocation(_infBlock, transposed_input); genTensorDeallocation(_infBlock, transposed_output); - - if (op.getNextNodes().empty()) - _outputs.insert(&op); } void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& activation_name, float a, float b) { - auto &prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() == 1); + assert(op.getNumInputs() == 1); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_output = op.getOutput(0); // Get the id of the input tensor. - auto in_op = prev_nodes[0].op; - auto in = AF::id(tensorName(in_op)); + auto in = AF::id(tensorName(ir_input)); // Create the output tensor in the DOM and return its id. - shared_ptr output = genTensor(op, op.getOutputShape(0)); + shared_ptr output = genTensor(ir_output); auto prefix = output->name() + "_activation_layer"; @@ -841,7 +837,7 @@ void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& act genLayerExecution(layer); } -shared_ptr AclCppOpGenerator::genAddition(const string& prefix, int index, +shared_ptr AclCppOpGenerator::genAddition(const string& prefix, size_t index, const Shape& ir_shape, shared_ptr in1, shared_ptr in2, @@ -927,16 +923,19 @@ shared_ptr AclCppOpGenerator::genMultiplication(const string& prefix return out; } -string AclCppOpGenerator::tensorName(const Operation* op) const { +string AclCppOpGenerator::tensorName(IODescriptor ir_tensor) const { string tensor_name; - if (!op->getName().empty()) { - tensor_name = "_" + op->getName(); + // TODO Use the tensor name instead of the operation name. + const auto& op_name = ir_tensor.op->getName(); + + if (!op_name.empty()) { + tensor_name = "_" + op_name; replace_if(tensor_name.begin(), tensor_name.end(), [](char c) { return std::isalnum(c) == 0; }, '_'); } else { - tensor_name = "tensor_" + to_string(op->getId()); + tensor_name = "tensor_" + to_string(ir_tensor.op->getId()); } return tensor_name; @@ -985,28 +984,24 @@ shared_ptr AclCppOpGenerator::genTensor(const string& name, return id; } -shared_ptr AclCppOpGenerator::genTensor(Operation& op, const Shape& ir_shape) { - if (op.getType() == Operation::Type::input) - _inputs.insert(&op); - - if (op.getNextNodes().empty() && op.getType() != Operation::Type::constant) - _outputs.insert(&op); - - return genTensor(tensorName(&op), ir_shape, !op.getName().empty()); +shared_ptr AclCppOpGenerator::genTensor(IODescriptor ir_tensor) { + return genTensor(tensorName(ir_tensor), ir_tensor.getShape(), !ir_tensor.op->getName().empty()); } -void AclCppOpGenerator::genNamed() { - if (_inputs.size() == 1) { +void AclCppOpGenerator::genNamed(Graph* graph) { + const auto& inputs = graph->collectInputs(); + if (inputs.size() == 1) { auto f = _artifactClass->func(true, "arm_compute::CLTensor&", "getInput"); auto b = f->getBlock(); - auto id = AF::id(tensorName(*_inputs.begin())); + auto id = AF::id(tensorName(inputs[0]->getOutput(0))); b->ret(id); } - if (_outputs.size() == 1) { + const auto& outputs = graph->collectOutputs(); + if (outputs.size() == 1) { auto f = _artifactClass->func(true, "arm_compute::CLTensor&", "getOutput"); auto b = f->getBlock(); - auto id = AF::id(tensorName(*_outputs.begin())); + auto id = AF::id(tensorName(outputs[0]->getOutput(0))); b->ret(id); } } @@ -1137,19 +1132,19 @@ void AclCppOpGenerator::genTranspose(const std::shared_ptr& inp } void AclCppOpGenerator::visit(mir::ops::TransposeOp& op) { - auto& prev_nodes = op.getPrevNodes(); - assert(prev_nodes.size() == 1); - auto in_op = prev_nodes[0].op; + assert(op.getNumInputs() == 1); + IODescriptor ir_input = op.getInput(0); + IODescriptor ir_output = op.getOutput(0); // Get the input node tensor id in the DOM. - shared_ptr input = AF::id(tensorName(in_op)); + shared_ptr input = AF::id(tensorName(ir_input)); const vector& mir_axis_order = op.getAxisOrder(); // Create the output tensor in the DOM. - if (op.getOutputShape(0).rank() != 4) + if (ir_output.getShape().rank() != 4) throw AclCppException("Unsupported number of dimensions in transpose operation"); // TODO replace transpose shape - shared_ptr output = genTensor(op, op.getOutputShape(0)); + shared_ptr output = genTensor(ir_output); // Actual generation of operation and related stuff genTranspose(input, output, mir_axis_order, false); diff --git a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h index 3e02610..6b37ff6 100644 --- a/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h +++ b/contrib/nnc/passes/acl_soft_backend/AclCppOpGenerator.h @@ -135,7 +135,7 @@ private: * to the left of and including the in2 term, or the operation out if in2 was * the last term in the sequence. */ - std::shared_ptr genAddition(const std::string& prefix, int index, + std::shared_ptr genAddition(const std::string& prefix, size_t index, const mir::Shape& ir_shape, std::shared_ptr in1, std::shared_ptr in2, @@ -170,7 +170,7 @@ private: /** * @brief Generates a unique name for the tensor. */ - std::string tensorName(const mir::Operation* op) const; + std::string tensorName(mir::IODescriptor ir_tensor) const; /** * @brief Generates variables tensor shape in DOM. @@ -198,11 +198,10 @@ private: /** * @brief Generates a DOM tensor. - * @param op - an IR operation for which this tensor is generated. - * @param ir_shape - a shape in IR. + * @param ir_tensor - the ModelIR tensor. * @return - a DOM identifier for the created tensor. */ - std::shared_ptr genTensor(mir::Operation& op, const mir::Shape& ir_shape); + std::shared_ptr genTensor(mir::IODescriptor ir_tensor); /** * @brief generate transposing operation, @p mir_perm contains dimensions in MIR order (batch has index 0) @@ -217,8 +216,9 @@ private: /** * @brief Generates accessors for the input/output tensors. + * @param graph - the ModelIR graph. */ - void genNamed(); + void genNamed(mir::Graph* graph); /** * @brief Schedule a tensor serialization. @@ -294,16 +294,6 @@ private: void genLayerExecution(std::shared_ptr layer_id); /** - * @brief Input nodes. - */ - std::set _inputs; - - /** - * @brief Output nodes. - */ - std::set _outputs; - - /** * @brief All named tensors names. */ std::set _tensorNames; diff --git a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp index 1a7a3ca..c403abd 100644 --- a/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp +++ b/contrib/nnc/passes/soft_backend/ModelAnalyzer.cpp @@ -255,12 +255,12 @@ void ModelAnalyzer::visit(ops::BiasAddOp& op) { } void ModelAnalyzer::visit(ops::InputOp& op) { - assert(op.getPrevNodes().empty()); + assert(op.getNumInputs() == 0); appendOperationToInference(&op, "in"); } void ModelAnalyzer::visit(ops::ConstantOp& op) { - assert(op.getPrevNodes().empty()); + assert(op.getNumInputs() == 0); // FIXME This is to work around deserializeTensors not being able to deserialize tensors of type // other than float32. diff --git a/contrib/nnc/unittests/core/operation.cpp b/contrib/nnc/unittests/core/operation.cpp index c2efa74..ad52172 100644 --- a/contrib/nnc/unittests/core/operation.cpp +++ b/contrib/nnc/unittests/core/operation.cpp @@ -31,7 +31,7 @@ TEST(Operation, ConnectionTest) { auto op2 = new ops::ReshapeOp(op1->getOutput(0), Shape{}); op2->setId(1); - ASSERT_EQ(op1->getId(), op2->getPrevNodes()[0].op->getId()); + ASSERT_EQ(op1->getId(), op2->getInput(0).op->getId()); delete op1; delete op2;