#include "core/modelIR/operations/SoftmaxOp.h"
#include "core/modelIR/operations/SqrtOp.h"
#include "core/modelIR/operations/TanhOp.h"
+#include "core/modelIR/operations/TransposeOp.h"
#include "core/modelIR/operations/VariableOp.h"
#include <algorithm>
}
void AclCppOpGenerator::visit(ops::ConcatOp& op) {
- static const char* axis_names[] = {"arm_compute::DataLayoutDimension::BATCHES",
- "arm_compute::DataLayoutDimension::HEIGHT",
- "arm_compute::DataLayoutDimension::WIDTH",
- "arm_compute::DataLayoutDimension::CHANNEL"};
-
- int axis = op.getAxis() < 0 ? op.getOutputShape(0).rank() + op.getAxis() : op.getAxis();
- assert(axis < sizeof(axis_names) / sizeof(const char*));
- auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
+
+ int axis = op.getAxis();
+ assert(axis < 4 && axis >= 0 && "axis outside this range is not supported in ACL");
+
+ const char* axis_name;
+ if (cli::debugTranspose) {
+ static const char* axis_names[] = {"arm_compute::DataLayoutDimension::BATCHES",
+ "arm_compute::DataLayoutDimension::CHANNEL",
+ "arm_compute::DataLayoutDimension::HEIGHT",
+ "arm_compute::DataLayoutDimension::WIDTH"};
+ axis_name = axis_names[axis];
+ } else {
+ static const char* axis_names[] = {"arm_compute::DataLayoutDimension::BATCHES",
+ "arm_compute::DataLayoutDimension::HEIGHT",
+ "arm_compute::DataLayoutDimension::WIDTH",
+ "arm_compute::DataLayoutDimension::CHANNEL"};
+ axis_name = axis_names[axis];
+ }
+
+ auto out = genTensor(op, transposeShape<3, 2, 1, 0>(op.getOutputShape(0)));
auto prefix = out->name() + "_concatenate_layer";
auto inputs_var = _constrBlock->var("std::vector<arm_compute::ICLTensor*>", prefix + "_inputs");
auto inputs = inputs_var->use();
_constrBlock->call("push_back", {AF::ref(AF::id(tensorName(i.op)))}, inputs);
auto layer = genLayer("arm_compute::CLConcatenateLayer", prefix,
- {inputs, AF::ref(out), AF::lit(axis_names[axis])});
+ {inputs, AF::ref(out), AF::lit(axis_name)});
allocate(out);
- runLayer(layer);
+ genLayerExecution(layer);
}
void AclCppOpGenerator::visit(ops::Conv2DOp& op) {
sm_shape.dim(0) = sm_dim;
}
- auto out = genTensor(op, in_out_shape);
+ Shape transposed_out_shape;
+
+ switch (in_out_shape.rank()) {
+ case 4:
+ transposed_out_shape = transposeShape<3, 2, 1, 0>(in_out_shape);
+ break;
+ case 2:
+ transposed_out_shape = transposeShape<1, 0>(in_out_shape);
+ break;
+ default:
+ throw AclCppException("Unsupported number of dimensions in softmax");
+ }
+
+ auto out = genTensor(op, transposed_out_shape);
auto prefix = out->name();
if (axis == 0) {
auto sm = genLayer("arm_compute::CLSoftmaxLayer", prefix + "_softmax_layer",
{AF::ref(in), AF::ref(out)});
allocate(out);
- runLayer(sm);
+ genLayerExecution(sm);
} else {
// Need to reshape before the Softmax application and after it.
// Then we need two tensors for intermediate results. This is because we do a couple of auxiliary
auto transp1 = genLayer("arm_compute::CLReshapeLayer", prefix + "_transp_layer1",
{AF::ref(in), AF::ref(tmp)});
allocate(tmp);
- runLayer(transp1);
+ genLayerExecution(transp1);
// Apply the softmax operaion.
auto sm = genLayer("arm_compute::CLSoftmaxLayer", prefix + "_softmax_layer",
{AF::ref(tmp), AF::ref(tmp2)});
allocate(tmp2);
- runLayer(sm);
+ genLayerExecution(sm);
// Reshape the output to the original form.
auto transp2 = genLayer("arm_compute::CLReshapeLayer", prefix + "_transp_layer2",
{AF::ref(tmp2), AF::ref(out)});
allocate(out);
- runLayer(transp2);
+ genLayerExecution(transp2);
}
}
return pad_stride_info_var;
}
+shared_ptr<ArtifactId>
+AclCppOpGenerator::genTransposeMIRtoACL(const string& name,
+ const Shape& input_shape,
+ const shared_ptr<ArtifactId>& input) {
+
+ if (!cli::debugTranspose) {
+ // Generate output tensor description in the DOM.
+ shared_ptr<ArtifactId> output = AF::id(name);
+
+ _constrBlock->var("arm_compute::CLTensor&", output->name(), {}, {input});
+ return output;
+ }
+ Shape transposed_shape = transposeShape<2, 1, 3, 0>(input_shape);
+ shared_ptr<ArtifactId> transposed_id =
+ genTensor(name, transposed_shape, false);
+ genTranspose(input, transposed_id, {0, 3, 1, 2});
+ return transposed_id;
+}
+
+shared_ptr<ArtifactId>
+AclCppOpGenerator::genTransposeACLtoMIR(const string& name,
+ const Shape& input_shape,
+ const shared_ptr<ArtifactId>& input) {
+
+ if (!cli::debugTranspose) {
+ // Generate output tensor description in the DOM.
+ shared_ptr<ArtifactId> output = AF::id(name);
+
+ _constrBlock->var("arm_compute::CLTensor&", output->name(), {}, {input});
+ return output;
+ }
+ Shape transposed_shape = transposeShape<1, 3, 2, 0>(input_shape);
+ shared_ptr<ArtifactId> transposed_id =
+ genTensor(name, transposed_shape, false);
+ genTranspose(input, transposed_id, {0, 2, 3, 1});
+ return transposed_id;
+}
+
void AclCppOpGenerator::visit(ops::PoolOp& op) {
const char* pooling_type = nullptr;
assert(prev_nodes.size() == 1);
auto in_op = prev_nodes[0].op;
- auto in = AF::id(tensorName(in_op));
- auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
- auto prefix = out->name() + "_pooling_layer";
+ string in_name = tensorName(in_op);
+ auto in_id = AF::id(in_name);
- auto pad_stride_info_var = genPadStrideInfo(op, prefix, _constrBlock);
+ const string output_tensor_name = tensorName(&op);
- auto pad_stride_info = pad_stride_info_var->use();
- auto kernel_window_var = _constrBlock->var("arm_compute::Size2D", prefix + "_kernel_window", {},
- {AF::lit(to_string(op.getWindowShape().dim(1))),
- AF::lit(to_string(op.getWindowShape().dim(0)))});
- auto kernel_window = kernel_window_var->use();
- auto pooling_info_var = _constrBlock->var(
- "arm_compute::PoolingLayerInfo", prefix + "_pooling_info", {},
+ // Transpose data from MIR format to format compatible with ACL
+ const string transposed_input_name = output_tensor_name + "transposed_input";
+ shared_ptr<ArtifactId> transposed_input =
+ genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), in_id);
+
+ const string layer_name = output_tensor_name + "_pooling_layer";
+
+ shared_ptr<ArtifactVariable> pad_stride_info_var =
+ genPadStrideInfo(op, layer_name, _constrBlock);
+
+ shared_ptr<ArtifactId> pad_stride_info = pad_stride_info_var->use();
+
+ // Create kernel window info
+ shared_ptr<ArtifactVariable> kernel_window_var =
+ _constrBlock->var("arm_compute::Size2D", layer_name + "_kernel_window", {},
+ {AF::lit(to_string(op.getWindowShape().dim(1))),
+ AF::lit(to_string(op.getWindowShape().dim(0)))});
+ shared_ptr<ArtifactId> kernel_window = kernel_window_var->use();
+
+ // Create pooling info: pooling type, kernel info, strides, etc
+ shared_ptr<ArtifactVariable> pooling_info_var = _constrBlock->var(
+ "arm_compute::PoolingLayerInfo", layer_name + "_pooling_info", {},
{AF::lit(pooling_type), kernel_window, pad_stride_info,
AF::lit(op.getBorderType() == ops::PoolOp::BorderType::EMPTY ? "true" : "false")});
- auto pooling_info = pooling_info_var->use();
- auto layer = genLayer("arm_compute::CLPoolingLayer", prefix,
- {AF::ref(in), AF::ref(out), pooling_info});
- allocate(out);
- runLayer(layer);
+ shared_ptr<ArtifactId> pooling_info = pooling_info_var->use();
+
+ // Generate auxiliary tensor to hold transposed output of pool in NCHW format
+ Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(op.getOutputShape(0));
+ shared_ptr<ArtifactId> transposed_output =
+ genTensor(layer_name + "_out_transpose", transposeShape<3, 2, 1, 0>(transposed_output_shape));
+
+ // Actual layer creation
+ shared_ptr<ArtifactId> layer = genLayer("arm_compute::CLPoolingLayer", layer_name,
+ {AF::ref(transposed_input), AF::ref(transposed_output), pooling_info});
+ allocate(transposed_output);
+ genLayerExecution(layer);
+
+ shared_ptr<ArtifactId> output =
+ genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+
+ if (op.getNextNodes().empty())
+ _outputs.insert(&op);
}
void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) {
// Serialize the weights tensor and generate the function to deserialize it in the artifact.
serializeTensor(weights, ir_weights);
allocate(out);
- runLayer(layer);
+ genLayerExecution(layer);
}
void AclCppOpGenerator::visit(ops::GemmOp& op) {
auto in_op = prev_nodes[0].op;
// Get the input node tensor id in the DOM.
- auto in = AF::id(tensorName(in_op));
+ shared_ptr<ArtifactId> input = AF::id(tensorName(in_op));
+
+ const string output_tensor_name = tensorName(&op);
+
+ shared_ptr<ArtifactId> transposed_input;
+ Shape transposed_output_shape;
+ shared_ptr<ArtifactId> transposed_output;
// Create the output tensor in the DOM and obtain its identifier.
const Shape& out_shape = op.getOutputShape(0);
- Shape transposed_shape;
+ const string transposed_output_name = output_tensor_name + "_transposed_output";
+
switch (out_shape.rank()) {
- case 4:
- transposed_shape = transposeShape<2, 1, 3, 0>(out_shape);
+ case 4: {
+ // transpose input to NCHW format supported by ACL
+ const string transposed_input_name = output_tensor_name + "_transposed_input";
+ transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape);
+ transposed_input = genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), input);
+
+ transposed_output =
+ genTensor(transposed_output_name, transposeShape<3, 2, 1, 0>(transposed_output_shape));
break;
+ }
case 2:
- transposed_shape = transposeShape<1, 0>(out_shape);
+ transposed_output_shape = out_shape;
+ transposed_input = input;
+ transposed_output = genTensor(tensorName(&op), transposeShape<1, 0>(transposed_output_shape));
break;
case 1:
- transposed_shape = out_shape;
+ transposed_output_shape = out_shape;
+ transposed_input = input;
+ transposed_output = genTensor(tensorName(&op), out_shape);
break;
default:
throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
}
- shared_ptr<ArtifactId> out = genTensor(op, transposed_shape);
// Prefix used for the name of variables related to the operation implementation.
- string operation_name = out->name() + "_bias_add_layer";
+ string layer_name = transposed_output->name() + "_bias_add_layer";
// Reshape the IR biases tensor and generate the corresponding DOM tensor.
const auto ir_input_shape = op.getInputShape(0);
ir_biases_shape.dim(2) = ir_biases.getShape().dim(0);
}
- auto biases = genTensor(operation_name + "_biases", ir_biases_shape);
+ auto biases = genTensor(layer_name + "_biases", ir_biases_shape);
// Instantiate the CLArithmeticAddition object.
- auto layer = genLayer("arm_compute::CLArithmeticAddition", operation_name,
- {AF::ref(in), AF::ref(biases), AF::ref(out),
+ auto layer = genLayer("arm_compute::CLArithmeticAddition", layer_name,
+ {AF::ref(transposed_input), AF::ref(biases), AF::ref(transposed_output),
AF::lit("arm_compute::ConvertPolicy::WRAP")});
allocate(biases);
// Save the IR biases tensor to later read this in the artifact.
serializeTensor(biases, ir_biases);
- allocate(out);
- runLayer(layer);
+ allocate(transposed_output);
+ genLayerExecution(layer);
+
+ if (out_shape.rank() == 4) {
+ // Generate output in NHWC format
+ shared_ptr<ArtifactId> output =
+ genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+ }
+
+ if (op.getNextNodes().empty())
+ _outputs.insert(&op);
}
void AclCppOpGenerator::visit(ops::VariableOp& op) {
shared_ptr<ArtifactId> tensor;
- if (op.getOutputShape(0).rank() == 2) {
- tensor = genTensor(op, transposeShape<1, 0>(op.getOutputShape(0)));
+ if (cli::debugTranspose) {
+ if (op.getOutputShape(0).rank() == 2)
+ tensor = genTensor(op, transposeShape<1, 0>(op.getOutputShape(0)));
+ else
+ tensor = genTensor(op, transposeShape<3, 2, 1, 0>(op.getOutputShape(0)));
} else {
- tensor = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
+ if (op.getOutputShape(0).rank() == 2)
+ tensor = genTensor(op, transposeShape<1, 0>(op.getOutputShape(0)));
+ else
+ tensor = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
}
allocate(tensor);
}
auto layer = genLayer("arm_compute::CLReshapeLayer", out->name() + "_reshape_layer",
{AF::ref(in), AF::ref(out)});
allocate(out);
- runLayer(layer);
+ genLayerExecution(layer);
}
void AclCppOpGenerator::visit(ops::ScaleOp& op) {
auto in_op = prev_nodes[0].op;
// Get input tensor identifier in the generated artifact.
- auto in = AF::id(tensorName(in_op));
+ auto input = AF::id(tensorName(in_op));
+
+ const string output_tensor_name = tensorName(&op);
+
+ // transpose input to NCHW format supported by ACL
+ const string transposed_input_name = output_tensor_name + "_transposed_input";
+ shared_ptr<ArtifactId> transposed_input =
+ genTransposeMIRtoACL(transposed_input_name, op.getInputShape(0), input);
// Create the output tensor in the DOM and obtain its identifier.
const Shape& out_shape = op.getOutputShape(0);
- Shape transposed_shape;
+ Shape transposed_output_shape;
switch (out_shape.rank()) {
case 4:
- transposed_shape = transposeShape<2, 1, 3, 0>(out_shape);
+ transposed_output_shape = transposeShape<0, 3, 1, 2>(out_shape);
break;
case 2:
- transposed_shape = transposeShape<1, 0>(out_shape);
+ transposed_output_shape = transposeShape<1, 0>(out_shape);
break;
case 1:
- transposed_shape = out_shape;
+ transposed_output_shape = out_shape;
break;
default:
throw AclCppException("Unsupported number of dimensions: " + to_string(out_shape.rank()));
}
- shared_ptr<ArtifactId> out = genTensor(op, transposed_shape);
- auto operation_name = out->name() + "_scale_layer";
+ const string transposed_output_name = output_tensor_name + "_transposed_output";
+ shared_ptr<ArtifactId> transposed_output =
+ genTensor(transposed_output_name, transposeShape<3, 2, 1, 0>(transposed_output_shape));
+
+ auto operation_name = transposed_output->name() + "_scale_layer";
const auto& ir_scales = op.getWeights();
// Reshape the IR scales tensor and generate the corresponding DOM tensor.
- const auto ir_input_shape = transposeShape<2, 1, 3, 0>(op.getInputShape(0));
+ const Shape ir_input_shape = transposeShape<2, 1, 3, 0>(op.getInputShape(0));
Shape ir_scales_shape(ir_input_shape.rank());
// ACL CLArithmeticDivision supports input tensors broadcasting.
auto layer1 = genLayer("arm_compute::CLArithmeticDivision",
operation_name + "_arithmetic_div_layer_1",
{AF::ref(unit), AF::ref(scales), AF::ref(tmp)});
- runLayer(layer1);
+ genLayerExecution(layer1);
// Create an instance of the CLArithmeticDivision class as a member of the artifact class.
auto layer2 = genLayer("arm_compute::CLArithmeticDivision",
operation_name + "_arithmetic_div_layer_2",
- {AF::ref(in), AF::ref(tmp), AF::ref(out)});
+ {AF::ref(transposed_input), AF::ref(tmp), AF::ref(transposed_output)});
allocate(scales);
// Save the IR scales tensor to later read this in the artifact.
serializeTensor(scales, ir_scales);
// Fill the unit tensor with the 1 value.
fillTensor(unit, "1");
allocate(tmp);
- allocate(out);
- runLayer(layer2);
+ allocate(transposed_output);
+ genLayerExecution(layer2);
+
+ // Generate output in NHWC format
+ shared_ptr<ArtifactId> output =
+ genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+ if (op.getNextNodes().empty())
+ _outputs.insert(&op);
}
void AclCppOpGenerator::visit(mir::ops::SliceOp& op) {
- assert(false && "Unimplemented operation: SliceOp");
+ throw AclCppException( "Unimplemented operation: SliceOp");
}
void AclCppOpGenerator::visit(ops::BatchNormOp& op) {
void AclCppOpGenerator::visit(ops::ElementwiseOp& op) {
// Create the output tensor in the DOM and obtain its identifier.
- auto out = genTensor(op, op.getOutputShape(0));
+ auto out = genTensor(op, transposeShape<3, 2, 1, 0>(op.getOutputShape(0)));
auto& prev_nodes = op.getPrevNodes();
assert(prev_nodes.size() >= 2);
assert(prev_nodes.size() == 1);
auto in_op = prev_nodes[0].op;
+ // get output tensor name that is used as base for other names
+ const string output_tensor_name = tensorName(&op);
+
// Get the identifier of the input tensor in the DOM.
- auto in = AF::id(tensorName(in_op));
+ auto input = AF::id(tensorName(in_op));
- // Create the output tensor in the DOM.
- auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
- string operation_name = out->name() + suffix;
+ // Generate auxiliary tensor to hold transposed input of convolution in NCHW format
+ shared_ptr<ArtifactId> transposed_input =
+ genTransposeMIRtoACL(output_tensor_name + "_transposed_input", op.getInputShape(0), input);
+
+ // Create the transposed output tensor in the DOM.
+ const string transposed_output_name = output_tensor_name + "_transposed_output";
+ Shape transposed_output_shape = transposeShape<0, 3, 1, 2>(op.getOutputShape(0));
+ shared_ptr<ArtifactId> transposed_output =
+ genTensor(transposed_output_name, transposeShape<3, 2, 1, 0>(transposed_output_shape));
+
+ string operation_name = output_tensor_name + suffix;
// Generate a tensor for weights (kernel) in the DOM.
auto weights = genTensor(operation_name + "_weights", ir_weights_shape);
// The parameter for the conv_layer.config(&in, &weights, nullptr, &out, pad_stride_info)
// function call.
- list<shared_ptr<ArtifactExpr>> config_params{AF::ref(in), AF::ref(weights), AF::lit("nullptr"),
- AF::ref(out), pad_stride_info};
+ list<shared_ptr<ArtifactExpr>> config_params{AF::ref(transposed_input),
+ AF::ref(weights), AF::lit("nullptr"),
+ AF::ref(transposed_output), pad_stride_info};
// Add to additional parameters for deconvolution.
if (op.getType() == Operation::Type::deConv2D) {
}
// Create the convolution (/depthwise convolution/deconvolution) layer class instance.
- auto layer = genLayer(acl_func_name, operation_name, config_params);
+ shared_ptr<ArtifactId> layer = genLayer(acl_func_name, operation_name, config_params);
allocate(weights);
+
// Save the IR weights tensor to later read this in the artifact.
serializeTensor(weights, ir_weights);
- allocate(out);
- runLayer(layer);
+ allocate(transposed_output);
+ genLayerExecution(layer);
+
+ // Generate auxiliar tensor to hold transposed output of convolution in NHWC format
+ shared_ptr<ArtifactId> output =
+ genTransposeACLtoMIR(output_tensor_name, transposed_output_shape, transposed_output);
+
+ if (op.getNextNodes().empty())
+ _outputs.insert(&op);
}
void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& activation_name,
auto in = AF::id(tensorName(in_op));
// Create the output tensor in the DOM and return its id.
- auto out = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
- auto prefix = out->name() + "_activation_layer";
+ shared_ptr<ArtifactId> output;
+ if (cli::debugTranspose)
+ output = genTensor(op, transposeShape<3, 2, 1, 0>(op.getOutputShape(0)));
+ else
+ output = genTensor(op, transposeShape<2, 1, 3, 0>(op.getOutputShape(0)));
+
+ auto prefix = output->name() + "_activation_layer";
// Create an instance of the ActivationLayerInfo class as a local variable in the artifact
// constructor. This instance profide information about the concrete activation function,
// Create an instance of the CLActivationLayer class as a member of the artifact class.
auto layer = genLayer("arm_compute::CLActivationLayer", prefix,
- {AF::ref(in), AF::ref(out), activation_info});
- allocate(out);
- runLayer(layer);
+ {AF::ref(in), AF::ref(output), activation_info});
+ allocate(output);
+ genLayerExecution(layer);
}
shared_ptr<ArtifactId> AclCppOpGenerator::genAddition(const string& prefix, int index,
return tensor_name;
}
-std::shared_ptr<ArtifactId> AclCppOpGenerator::genShape(ArtifactBlock* block, const string& prefix,
- const Shape& shape) {
+template <typename T>
+std::shared_ptr<ArtifactId>
+AclCppOpGenerator::genVectorInitializedVar(ArtifactBlock* block, const string& type,
+ const string& name, const vector <T>& init) {
list<shared_ptr<ArtifactExpr>> dims;
- for (int i = 0; i < shape.rank(); ++i)
- dims.push_back(AF::lit(to_string(shape.dim(i))));
+ for (int i = 0; i < init.size(); ++i)
+ dims.push_back(AF::lit(to_string(init[i])));
- auto shape_var = block->var("arm_compute::TensorShape", prefix + "_shape", {}, dims);
+ auto shape_var = block->var(type, name, {}, dims);
auto shape_id = shape_var->use();
return shape_id;
}
-shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(const string& name, const Shape& ir_shape,
+shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(const string& name,
+ const Shape& ir_shape,
bool gen_accessor) {
auto id = AF::id(name);
if (_tensorNames.insert(name).second) {
_artifactClass->var(false, "arm_compute::CLTensor", name);
- auto shape = genShape(_constrBlock, name, ir_shape);
+ vector<int32_t> shape_vectorized;
+
+ // create vector of initializers from Shape
+ shape_vectorized.reserve(ir_shape.rank());
+ for (int i = 0; i < ir_shape.rank(); ++i)
+ shape_vectorized.push_back(ir_shape.dim(i));
+
+ const char* type_name = "arm_compute::TensorShape";
+ shared_ptr<ArtifactId> shape =
+ genVectorInitializedVar(_constrBlock, type_name, name + "_shape", shape_vectorized);
_constrBlock->call("initializeTensor", {id, shape});
if (gen_accessor) {
}
void AclCppOpGenerator::visit(ops::SqueezeOp& op) {
- assert(false && "Unimplemented operation: Squeeze");
+ throw AclCppException("Unimplemented operation: Squeeze");
}
void AclCppOpGenerator::visit(ops::SqrtOp& op) {
- assert(false && "Unimplemented operation: Sqrt");
+ throw AclCppException("Unimplemented operation: Sqrt");
}
void AclCppOpGenerator::allocate(std::shared_ptr<ArtifactId> tensor_id) {
_constrBlock->call("allocate", {}, AF::call("allocator", {}, a), ArtifactCallType::ref);
}
-shared_ptr<ArtifactId> AclCppOpGenerator::genLayer(
- const string& layer_type,
- const string& layer_name,
- const list<shared_ptr<ArtifactExpr>>& config_params) {
+shared_ptr<ArtifactId>
+AclCppOpGenerator::genLayer(const string& layer_type, const string& layer_name,
+ const list<shared_ptr<ArtifactExpr>>& config_params) {
auto layer_var = _artifactClass->var(false, layer_type, layer_name);
auto layer = layer_var->use();
_constrBlock->call("configure", config_params, layer);
return layer;
}
-void AclCppOpGenerator::runLayer(shared_ptr<ArtifactId> layer_id) {
+void AclCppOpGenerator::genLayerExecution(shared_ptr<ArtifactId> layer_id) {
_infBlock->call("run", {}, layer_id);
}
void AclCppOpGenerator::visit(mir::ops::ResizeOp& op) {
- assert(false && "Unimplemented operation: Resize");
+ throw AclCppException("Unimplemented operation: Resize");
}
void AclCppOpGenerator::visit(mir::ops::ReduceFOp& op) {
- assert(false && "Unimplemented operation: ReduceFOp");
+ throw AclCppException("Unimplemented operation: ReduceFOp");
+}
+
+void AclCppOpGenerator::genTranspose(const std::shared_ptr<nnc::ArtifactId>& input,
+ const std::shared_ptr<nnc::ArtifactId>& output,
+ const std::vector<size_t>& mir_perm) {
+
+ // acl 18.8 opencl implementation supports only 3 types of permutation:
+ // in mir (0, 3, 1, 2), in acl(axes are in reverse order) (1, 2, 0)
+ // in mir (0, 2, 3, 1), in acl (2, 0, 1)
+ // in mir (2, 3, 1, 0), in acl (3, 2, 0, 1)
+ // so here we try to transform mir transpose into one acl supports
+
+ const string& out_name = output->name();
+ vector<size_t> acl_perm;
+
+ if (mir_perm == vector<size_t>{0, 3, 1, 2})
+ acl_perm = {1, 2, 0};
+ else if (mir_perm == vector<size_t>{0, 2, 3, 1})
+ acl_perm = {2, 0, 1};
+ else if (mir_perm == vector<size_t>{2, 3, 1, 0})
+ acl_perm = {3, 2, 0, 1};
+ else
+ throw AclCppException("Unsupported transpose sequence in operation " + out_name);
+
+ // Create operation parameter containing permutation vector
+ shared_ptr<ArtifactId> perm_vector =
+ genVectorInitializedVar(_constrBlock, "arm_compute::PermutationVector",
+ out_name + "_perm_param", acl_perm);
+
+ // Instantiate the CLPermute object.
+ string layer_name = out_name + "_transpose_layer";
+ list<shared_ptr<ArtifactExpr>> arguments = {AF::ref(input), AF::ref(output), perm_vector};
+ auto layer = genLayer("arm_compute::CLPermute", layer_name, arguments);
+ allocate(output);
+ genLayerExecution(layer);
}
void AclCppOpGenerator::visit(mir::ops::TransposeOp& op) {
- assert(false && "Unimplemented operation: TransposeOp");
+ auto& prev_nodes = op.getPrevNodes();
+ assert(prev_nodes.size() == 1);
+ auto in_op = prev_nodes[0].op;
+
+ // Get the input node tensor id in the DOM.
+ shared_ptr<ArtifactId> input = AF::id(tensorName(in_op));
+ const vector<size_t>& mir_axis_order = op.getAxisOrder();
+
+ // Create the output tensor in the DOM.
+ if (op.getOutputShape(0).rank() != 4)
+ throw AclCppException("Unsupported number of dimensions in transpose operation");
+ // TODO replace transpose shape
+ shared_ptr<ArtifactId> output = genTensor(op, transposeShape<3, 2, 1, 0>(op.getOutputShape(0)));
+
+ // Actual generation of operation and related stuff
+ genTranspose(input, output, mir_axis_order);
}
void AclCppOpGenerator::visit(mir::ops::GatherOp& op) {
- assert(false && "Unimplemented operation: GatherOp");
+ throw AclCppException("Unimplemented operation: GatherOp");
}
void AclCppOpGenerator::visit(ops::SigmoidOp& op) {