g->accept(this);
// Generate all the deferred entities.
- genNamed();
+ genNamed(g);
genPersistentTensorAllocations();
genDeserializations();
genFillings();
}
void AclCppOpGenerator::visit(ops::ConcatOp& op) {
+ const auto& ir_inputs = op.getPrevNodes();
+ IODescriptor ir_output = op.getOutput(0);
+
static const char* axis_names[] = {"arm_compute::DataLayoutDimension::BATCHES",
"arm_compute::DataLayoutDimension::CHANNEL",
"arm_compute::DataLayoutDimension::HEIGHT",
"axis outside this range is not supported in ACL");
const char* axis_name = axis_names[axis];
- auto out = genTensor(op, op.getOutputShape(0));
+ auto out = genTensor(ir_output);
auto prefix = out->name() + "_concatenate_layer";
auto inputs_var = _constrBlock->var("std::vector<arm_compute::ICLTensor*>", prefix + "_inputs");
auto inputs = inputs_var->use();
- for (auto i : op.getPrevNodes())
- _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(i.op)))}, inputs);
+ for (IODescriptor ir_input : ir_inputs)
+ _constrBlock->call("push_back", {AF::ref(AF::id(tensorName(ir_input)))}, inputs);
auto layer = genLayer("arm_compute::CLConcatenateLayer", prefix,
{inputs, AF::ref(out), AF::lit(axis_name)});
}
void AclCppOpGenerator::visit(ops::SoftmaxOp& op) {
- auto& in_ops = op.getPrevNodes();
- assert(in_ops.size() == 1);
- auto in_op = in_ops[0].op;
- auto in = AF::id(tensorName(in_op));
+ assert(op.getNumInputs() == 1);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_output = op.getOutput(0);
+
+ auto in = AF::id(tensorName(ir_input));
- int rank = op.getOutputShape(0).rank();
+ int rank = ir_output.getShape().rank();
// CLPermute does not support all kinds of permutations now.
// rank can be more than 2 in our models, so we can not use CLTranspose.
// This means we can support tensors with no more then one axis > 1.
int nof_long_axes = 0;
for (int i = 0; i < rank; ++i) {
- if (op.getOutputShape(0).dim(i) > 1)
+ if (ir_output.getShape().dim(i) > 1)
++nof_long_axes;
}
throw AclCppException("Unsupported Softmax operation with several dimensions greater than 1");
// Create the output tensor.
- const Shape& in_out_shape = op.getOutputShape(0);
-
- shared_ptr<ArtifactId> output = genTensor(op, in_out_shape);
+ shared_ptr<ArtifactId> output = genTensor(ir_output);
auto layer_name_prefix = output->name();
if (axis == 0) {
// Then we need two tensors for intermediate results. This is because we do a couple of auxiliary
// reshapes: one to transform the input tensor to a unidimensional tensor and the second to
// transorm the result of the softmax operation back to the original form.
- Shape sm_shape(in_out_shape);
+ Shape sm_shape(ir_output.getShape());
std::swap(sm_shape.dim(axis), sm_shape.dim(-1));
}
void AclCppOpGenerator::visit(ops::PoolOp& op) {
+ assert(op.getNumInputs() == 1);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_output = op.getOutput(0);
+
const char* pooling_type = nullptr;
switch (op.getPoolingType()) {
default:
throw AclCppException("Unsupported pooling type");
}
-
- auto& prev_nodes = op.getPrevNodes();
- assert(prev_nodes.size() == 1);
- auto in_op = prev_nodes[0].op;
- string in_name = tensorName(in_op);
+ string in_name = tensorName(ir_input);
auto in_id = AF::id(in_name);
- const string output_tensor_name = tensorName(&op);
+ const string output_tensor_name = tensorName(ir_output);
// Transpose data from MIR format to format compatible with ACL
const string transposed_input_name = output_tensor_name + "transposed_input";
shared_ptr<ArtifactId> transposed_input =
- genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), in_id);
+ genTransposeMIRtoACL(transposed_input_name, ir_input.getShape(), in_id);
const string layer_name = output_tensor_name + "_pooling_layer";
shared_ptr<ArtifactId> pooling_info = pooling_info_var->use();
// Generate auxiliary tensor to hold transposed output of pool in NCHW format
- Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(op.getOutputShape(0));
+ Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output.getShape());
shared_ptr<ArtifactId> transposed_output =
genTensor(layer_name + "_out_transpose", transposed_output_shape);
genTensorDeallocation(_infBlock, transposed_input);
genTensorDeallocation(_infBlock, transposed_output);
-
- if (op.getNextNodes().empty())
- _outputs.insert(&op);
}
void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) {
- auto& prev_nodes = op.getPrevNodes();
- assert(prev_nodes.size() == 2);
+ assert(op.getNumInputs() == 2);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_weights = op.getInput(1);
+ IODescriptor ir_output = op.getOutput(0);
- auto in_op = prev_nodes[0].op;
- auto ir_weights_op = dynamic_cast<mir::ops::ConstantOp*>(prev_nodes[1].op);
+ auto ir_weights_op = dynamic_cast<mir::ops::ConstantOp*>(ir_weights.op);
if (ir_weights_op == nullptr)
throw AclCppException("Unsupported operation type");
- const TensorVariant ir_weights = transposeTensor<1, 0>(ir_weights_op->getValue());
+ const TensorVariant ir_weights_tensor = transposeTensor<1, 0>(ir_weights_op->getValue());
const Shape& ir_weights_shape = ir_weights.getShape();
// Get the input node tensor id in the DOM.
- auto in = AF::id(tensorName(in_op));
+ auto in = AF::id(tensorName(ir_input));
// Create the output tensor in the DOM.
- const Shape& out_shape = op.getOutputShape(0);
- if (out_shape.rank() != 2)
+ if (ir_output.getShape().rank() != 2)
throw AclCppException("Unsupported number of dimensions in fc layer");
- auto out = genTensor(op, out_shape);
+ auto out = genTensor(ir_output);
string operation_name = out->name() + "_fully_connected_layer";
// Create the weights tensor in the DOM and use its id.
addToPersistentTensors(weights);
// Serialize the weights tensor and generate the function to deserialize it in the artifact.
- serializeTensor(weights, ir_weights);
+ serializeTensor(weights, ir_weights_tensor);
addToPersistentTensors(out);
genLayerExecution(layer);
}
}
void AclCppOpGenerator::visit(ops::BiasAddOp& op) {
- auto& prev_nodes = op.getPrevNodes();
- assert(prev_nodes.size() == 2);
- auto in_op = prev_nodes[0].op;
- auto ir_biases_op = dynamic_cast<ops::ConstantOp*>(prev_nodes[1].op);
- if (ir_biases_op == nullptr)
+ assert(op.getNumInputs() == 2);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_weights = op.getInput(1);
+ IODescriptor ir_output = op.getOutput(0);
+
+ auto ir_weights_op = dynamic_cast<ops::ConstantOp*>(ir_weights.op);
+ if (ir_weights_op == nullptr)
throw AclCppException("Unsupported operation type");
- const auto& ir_biases = ir_biases_op->getValue();
- assert(ir_biases.getShape().rank() == 1);
+ const auto& ir_weights_tensor = ir_weights_op->getValue();
+ assert(ir_weights_tensor.getShape().rank() == 1);
// Get the input node tensor id in the DOM.
- shared_ptr<ArtifactId> input = AF::id(tensorName(in_op));
+ shared_ptr<ArtifactId> input = AF::id(tensorName(ir_input));
- const string output_tensor_name = tensorName(&op);
+ const string output_tensor_name = tensorName(ir_output);
shared_ptr<ArtifactId> transposed_input;
Shape transposed_output_shape;
shared_ptr<ArtifactId> transposed_output;
// Create the output tensor in the DOM and obtain its identifier.
- const Shape& out_shape = op.getOutputShape(0);
+ const Shape& out_shape = ir_output.getShape();
const string transposed_output_name = output_tensor_name + "_transposed_output";
switch (out_shape.rank()) {
// transpose input to NCHW format supported by ACL
const string transposed_input_name = output_tensor_name + "_transposed_input";
transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape);
- transposed_input = genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), input);
+ transposed_input = genTransposeMIRtoACL(transposed_input_name, ir_input.getShape(), input);
transposed_output =
genTensor(transposed_output_name, transposed_output_shape);
case 1:
transposed_output_shape = out_shape;
transposed_input = input;
- transposed_output = genTensor(tensorName(&op), out_shape);
+ transposed_output = genTensor(tensorName(ir_output), out_shape);
break;
default:
throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
string layer_name = transposed_output->name() + "_bias_add_layer";
// Reshape the IR biases tensor and generate the corresponding DOM tensor.
- const auto ir_input_shape = op.getInputShape(0);
+ const auto& ir_input_shape = ir_input.getShape();
Shape ir_biases_shape(ir_input_shape.rank());
// ACL CLArithmeticAddition supports input tensors broadcasting.
for (int i = 0; i < ir_input_shape.rank(); ++i)
ir_biases_shape.dim(i) = 1;
- ir_biases_shape.dim(1) = ir_biases.getShape().dim(0);
+ ir_biases_shape.dim(1) = ir_weights_tensor.getShape().dim(0);
auto biases = genTensor(layer_name + "_biases", ir_biases_shape);
// Instantiate the CLArithmeticAddition object.
addToPersistentTensors(biases);
// Save the IR biases tensor to later read this in the artifact.
- serializeTensor(biases, ir_biases);
+ serializeTensor(biases, ir_weights_tensor);
genTensorAllocation(_infBlock, transposed_output);
genLayerExecution(layer);
genTensorDeallocation(_infBlock, transposed_input);
genTensorDeallocation(_infBlock, transposed_output);
}
-
- if (op.getNextNodes().empty())
- _outputs.insert(&op);
}
void AclCppOpGenerator::visit(ops::InputOp& op) {
shared_ptr<ArtifactId> tensor;
- tensor = genTensor(op, op.getOutputShape(0));
+ tensor = genTensor(op.getOutput(0));
addToPersistentTensors(tensor);
}
void AclCppOpGenerator::visit(ops::ConstantOp& op) {
if (shouldSerializeConstant(op)) {
- Shape out_shape = op.getOutputShape(0);
TensorVariant data = op.getValue();
- shared_ptr<ArtifactId> out = genTensor(op, out_shape);
+ shared_ptr<ArtifactId> out = genTensor(op.getOutput(0));
addToPersistentTensors(out);
serializeTensor(out, data);
}
}
void AclCppOpGenerator::visit(ops::ReshapeOp& op) {
- auto& prev_nodes = op.getPrevNodes();
- assert(prev_nodes.size() == 1);
+ assert(op.getNumInputs() == 1);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_output = op.getOutput(0);
// Get the id of the input tensor in the generated artifact.
- auto in_op = prev_nodes[0].op;
- auto in = AF::id(tensorName(in_op));
+ auto in = AF::id(tensorName(ir_input));
// Create the output tensor in the DOM and return its id.
- const Shape& out_shape = op.getOutputShape(0);
+ const Shape& out_shape = ir_output.getShape();
// This check confirms that we can "safely" reshape data
// The only safe configuration of output shape is (1...1, N, 1 ... 1)
}
}
- shared_ptr<ArtifactId> out = genTensor(op, out_shape);
+ shared_ptr<ArtifactId> out = genTensor(ir_output);
// Create an instance of the CLReshapeLayer class as a member of the artifact class.
auto layer = genLayer("arm_compute::CLReshapeLayer", out->name() + "_reshape_layer",
void AclCppOpGenerator::visit(ops::ScaleOp& op) {
// May be not a perfect implementation, using the CLPixelWiseMultiplication ACL function taking
// two input tensors with the same shapes.
- auto prev_nodes = op.getPrevNodes();
- assert(prev_nodes.size() == 2);
- auto in_op = prev_nodes[0].op;
- auto ir_scales_op = dynamic_cast<ops::ConstantOp*>(prev_nodes[1].op);
- if (ir_scales_op == nullptr)
+ assert(op.getNumInputs() == 2);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_weights = op.getInput(1);
+ IODescriptor ir_output = op.getOutput(0);
+
+ auto ir_weights_op = dynamic_cast<ops::ConstantOp*>(ir_weights.op);
+ if (ir_weights_op == nullptr)
throw AclCppException("Unsupported operation type");
- const auto& ir_scales = ir_scales_op->getValue();
- assert(ir_scales.getShape().rank() == 1);
+ const auto& ir_weights_tensor = ir_weights_op->getValue();
+ assert(ir_weights_tensor.getShape().rank() == 1);
// Get input tensor identifier in the generated artifact.
- auto input = AF::id(tensorName(in_op));
+ auto input = AF::id(tensorName(ir_input));
- const string output_tensor_name = tensorName(&op);
+ const string output_tensor_name = tensorName(ir_output);
// transpose input to NCHW format supported by ACL
const string transposed_input_name = output_tensor_name + "_transposed_input";
shared_ptr<ArtifactId> transposed_input =
- genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), input);
+ genTransposeMIRtoACL(transposed_input_name, ir_input.getShape(), input);
// Create the output tensor in the DOM and obtain its identifier.
- const Shape& out_shape = op.getOutputShape(0);
+ const Shape& out_shape = ir_output.getShape();
Shape transposed_output_shape;
switch (out_shape.rank()) {
case 4:
auto operation_name = transposed_output->name() + "_scale_layer";
// Reshape the IR scales tensor and generate the corresponding DOM tensor.
- const Shape ir_input_shape = transposeShape<0, 3, 1, 2>(op.getInputShape(0));
+ const Shape ir_input_shape = transposeShape<0, 3, 1, 2>(ir_input.getShape());
Shape ir_scales_shape(ir_input_shape.rank());
// ACL CLArithmeticDivision supports input tensors broadcasting.
for (int i = 0; i < ir_input_shape.rank(); ++i)
ir_scales_shape.dim(i) = 1;
- ir_scales_shape.dim(1) = ir_scales.getShape().dim(0);
+ ir_scales_shape.dim(1) = ir_weights_tensor.getShape().dim(0);
auto scales = genTensor(operation_name + "_scales", ir_scales_shape);
// We do not use the genMultiplication() function here because the input needs broadcasting.
addToPersistentTensors(scales);
// Save the IR scales tensor to later read this in the artifact.
- serializeTensor(scales, ir_scales);
+ serializeTensor(scales, ir_weights_tensor);
addToPersistentTensors(unit);
// Fill the unit tensor with the 1 value.
fillTensor(unit, "1");
genTensorDeallocation(_infBlock, transposed_input);
genTensorDeallocation(_infBlock, transposed_output);
-
- if (op.getNextNodes().empty())
- _outputs.insert(&op);
}
void AclCppOpGenerator::visit(mir::ops::SliceOp&) {
}
void AclCppOpGenerator::visit(ops::DropoutOp& op) {
+ assert(op.getNumInputs() == 1);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_output = op.getOutput(0);
+
// Just copy input tensor to the output one.
- auto prev_ops = op.getPrevNodes();
- assert(prev_ops.size() == 1);
- Operation* in_op = prev_ops[0].op;
// Get input tensor identifier in the generated artifact.
- std::shared_ptr<ArtifactId> in = AF::id(tensorName(in_op));
+ shared_ptr<ArtifactId> in = AF::id(tensorName(ir_input));
// Generate output tensor description in the DOM.
- shared_ptr<ArtifactId> out = AF::id(tensorName(&op));
+ shared_ptr<ArtifactId> out = AF::id(tensorName(ir_output));
_constrBlock->var("arm_compute::CLTensor&", out->name(), {}, {in});
}
}
void AclCppOpGenerator::visit(ops::ElementwiseOp& op) {
+ assert(op.getNumInputs() >= 2);
+ const auto& ir_inputs = op.getPrevNodes();
+ IODescriptor ir_output = op.getOutput(0);
+
// Create the output tensor in the DOM and obtain its identifier.
- auto out = genTensor(op, op.getOutputShape(0));
+ auto out = genTensor(ir_output);
addToPersistentTensors(out);
- auto& prev_nodes = op.getPrevNodes();
- assert(prev_nodes.size() >= 2);
-
- auto in_op1 = prev_nodes[0].op;
-
// Get the identifier of the first input tensor in the DOM.
- auto in1 = AF::id(tensorName(in_op1));
+ auto in1 = AF::id(tensorName(ir_inputs[0]));
- for (size_t i = 1; i < prev_nodes.size(); ++i) {
- auto in_op2 = prev_nodes[i].op;
+ for (size_t i = 1; i < ir_inputs.size(); ++i) {
+ IODescriptor ir_input = ir_inputs[i];
// Get the identifier of the second input tensor in the DOM.
- auto in2 = AF::id(tensorName(in_op2));
+ auto in2 = AF::id(tensorName(ir_input));
// Chaining the partial results of binary operations.
// On the last iteration the result is saved in the node output.
// Different ACL layers used to implement different types of elementwise operations.
switch (op.getOpType()) {
case ops::ElementwiseOp::OpType::mul:
- in1 = genMultiplication(out->name() + "_" + "multiplication", i - 1, op.getInputShape(i),
- in1, in2, i == prev_nodes.size() - 1 ? out : nullptr);
+ in1 = genMultiplication(out->name() + "_" + "multiplication", i - 1, ir_input.getShape(),
+ in1, in2, i == ir_inputs.size() - 1 ? out : nullptr);
break;
case ops::ElementwiseOp::OpType::add:
- in1 = genAddition(out->name() + "_" + "addition", i - 1, op.getInputShape(i),
- in1, in2, i == prev_nodes.size() - 1 ? out : nullptr);
+ in1 = genAddition(out->name() + "_" + "addition", i - 1, ir_input.getShape(),
+ in1, in2, i == ir_inputs.size() - 1 ? out : nullptr);
break;
default:
throw AclCppException("This min elementwise operation is currently not supported");
}
void AclCppOpGenerator::visit(ops::PadOp& op) {
+ assert(op.getNumInputs() == 1);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_output = op.getOutput(0);
+
// Get the id of the input tensor.
- auto in_op = op.getPrevNodes()[0].op;
- auto input = AF::id(tensorName(in_op));
+ auto input = AF::id(tensorName(ir_input));
// Create the output tensor in the DOM
- auto out = genTensor(op, op.getOutputShape(0));
+ auto out = genTensor(ir_output);
addToPersistentTensors(out);
// Generate PadLayer params
template <typename Op>
void AclCppOpGenerator::genConvolution(Op& op, const string& acl_func_name, const string& suffix) {
- const auto& prev_nodes = op.getPrevNodes();
- assert(prev_nodes.size() == 2);
+ IODescriptor ir_input = op.getPrevNodes()[0];
+ IODescriptor ir_weights = op.getPrevNodes()[1];
+ IODescriptor ir_output = op.getOutput(0);
- auto in_op = prev_nodes[0].op;
- auto ir_weights_op = dynamic_cast<ops::ConstantOp*>(prev_nodes[1].op);
+ auto ir_weights_op = dynamic_cast<ops::ConstantOp*>(ir_weights.op);
if (ir_weights_op == nullptr)
throw AclCppException("Unsupported operation type");
- auto ir_weights = transposeTensor<3, 2, 0, 1>(ir_weights_op->getValue());
- const Shape& ir_weights_shape = ir_weights.getShape();
+ auto ir_weights_tensor = transposeTensor<3, 2, 0, 1>(ir_weights_op->getValue());
+ const Shape& ir_weights_shape = ir_weights_tensor.getShape();
// get output tensor name that is used as base for other names
- const string output_tensor_name = tensorName(&op);
+ const string output_tensor_name = tensorName(ir_output);
// Get the identifier of the input tensor in the DOM.
- auto input = AF::id(tensorName(in_op));
+ auto input = AF::id(tensorName(ir_input));
// Generate auxiliary tensor to hold transposed input of convolution in NCHW format
shared_ptr<ArtifactId> transposed_input =
- genTransposeMIRtoACL(output_tensor_name + "_transposed_input", op.getInputShape(0), input);
+ genTransposeMIRtoACL(output_tensor_name + "_transposed_input", ir_input.getShape(), input);
// Create the transposed output tensor in the DOM.
const string transposed_output_name = output_tensor_name + "_transposed_output";
- Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(op.getOutputShape(0));
+ Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(ir_output.getShape());
shared_ptr<ArtifactId> transposed_output =
genTensor(transposed_output_name, transposed_output_shape);
addToPersistentTensors(weights);
// Save the IR weights tensor to later read this in the artifact.
- serializeTensor(weights, ir_weights);
+ serializeTensor(weights, ir_weights_tensor);
genTensorAllocation(_infBlock, transposed_output);
genLayerExecution(layer);
genTensorDeallocation(_infBlock, transposed_input);
genTensorDeallocation(_infBlock, transposed_output);
-
- if (op.getNextNodes().empty())
- _outputs.insert(&op);
}
void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& activation_name,
float a, float b) {
- auto &prev_nodes = op.getPrevNodes();
- assert(prev_nodes.size() == 1);
+ assert(op.getNumInputs() == 1);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_output = op.getOutput(0);
// Get the id of the input tensor.
- auto in_op = prev_nodes[0].op;
- auto in = AF::id(tensorName(in_op));
+ auto in = AF::id(tensorName(ir_input));
// Create the output tensor in the DOM and return its id.
- shared_ptr<ArtifactId> output = genTensor(op, op.getOutputShape(0));
+ shared_ptr<ArtifactId> output = genTensor(ir_output);
auto prefix = output->name() + "_activation_layer";
genLayerExecution(layer);
}
-shared_ptr<ArtifactId> AclCppOpGenerator::genAddition(const string& prefix, int index,
+shared_ptr<ArtifactId> AclCppOpGenerator::genAddition(const string& prefix, size_t index,
const Shape& ir_shape,
shared_ptr<ArtifactId> in1,
shared_ptr<ArtifactId> in2,
return out;
}
-string AclCppOpGenerator::tensorName(const Operation* op) const {
+string AclCppOpGenerator::tensorName(IODescriptor ir_tensor) const {
string tensor_name;
- if (!op->getName().empty()) {
- tensor_name = "_" + op->getName();
+ // TODO Use the tensor name instead of the operation name.
+ const auto& op_name = ir_tensor.op->getName();
+
+ if (!op_name.empty()) {
+ tensor_name = "_" + op_name;
replace_if(tensor_name.begin(),
tensor_name.end(),
[](char c) { return std::isalnum(c) == 0; }, '_');
} else {
- tensor_name = "tensor_" + to_string(op->getId());
+ tensor_name = "tensor_" + to_string(ir_tensor.op->getId());
}
return tensor_name;
return id;
}
-shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(Operation& op, const Shape& ir_shape) {
- if (op.getType() == Operation::Type::input)
- _inputs.insert(&op);
-
- if (op.getNextNodes().empty() && op.getType() != Operation::Type::constant)
- _outputs.insert(&op);
-
- return genTensor(tensorName(&op), ir_shape, !op.getName().empty());
+shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(IODescriptor ir_tensor) {
+ return genTensor(tensorName(ir_tensor), ir_tensor.getShape(), !ir_tensor.op->getName().empty());
}
-void AclCppOpGenerator::genNamed() {
- if (_inputs.size() == 1) {
+void AclCppOpGenerator::genNamed(Graph* graph) {
+ const auto& inputs = graph->collectInputs();
+ if (inputs.size() == 1) {
auto f = _artifactClass->func(true, "arm_compute::CLTensor&", "getInput");
auto b = f->getBlock();
- auto id = AF::id(tensorName(*_inputs.begin()));
+ auto id = AF::id(tensorName(inputs[0]->getOutput(0)));
b->ret(id);
}
- if (_outputs.size() == 1) {
+ const auto& outputs = graph->collectOutputs();
+ if (outputs.size() == 1) {
auto f = _artifactClass->func(true, "arm_compute::CLTensor&", "getOutput");
auto b = f->getBlock();
- auto id = AF::id(tensorName(*_outputs.begin()));
+ auto id = AF::id(tensorName(outputs[0]->getOutput(0)));
b->ret(id);
}
}
}
void AclCppOpGenerator::visit(mir::ops::TransposeOp& op) {
- auto& prev_nodes = op.getPrevNodes();
- assert(prev_nodes.size() == 1);
- auto in_op = prev_nodes[0].op;
+ assert(op.getNumInputs() == 1);
+ IODescriptor ir_input = op.getInput(0);
+ IODescriptor ir_output = op.getOutput(0);
// Get the input node tensor id in the DOM.
- shared_ptr<ArtifactId> input = AF::id(tensorName(in_op));
+ shared_ptr<ArtifactId> input = AF::id(tensorName(ir_input));
const vector<size_t>& mir_axis_order = op.getAxisOrder();
// Create the output tensor in the DOM.
- if (op.getOutputShape(0).rank() != 4)
+ if (ir_output.getShape().rank() != 4)
throw AclCppException("Unsupported number of dimensions in transpose operation");
// TODO replace transpose shape
- shared_ptr<ArtifactId> output = genTensor(op, op.getOutputShape(0));
+ shared_ptr<ArtifactId> output = genTensor(ir_output);
// Actual generation of operation and related stuff
genTranspose(input, output, mir_axis_order, false);