using namespace mir;
AclCppOpGenerator::AclCppOpGenerator(const string& name, ostream& par_out)
- : _parOut(par_out), _module(name), _clScheduler(AF::id("arm_compute::CLScheduler")) {}
+ : _parOut(par_out), _module(name), _clScheduler(AF::id("arm_compute::CLScheduler")) {}
const ArtifactModule& AclCppOpGenerator::generate(mir::Graph* g) {
+ // Including headers.
_module.addHeaderSysInclude("fstream");
_module.addHeaderInclude("arm_compute/core/Types.h");
_module.addHeaderInclude("arm_compute/runtime/CL/CLFunctions.h");
_module.addHeaderInclude("arm_compute/runtime/CL/CLScheduler.h");
+ _module.addHeaderInclude("arm_compute/runtime/CL/CLBufferAllocator.h");
+ _module.addHeaderInclude("arm_compute/runtime/BlobLifetimeManager.h");
+ _module.addHeaderInclude("arm_compute/runtime/PoolManager.h");
+ _module.addHeaderInclude("arm_compute/runtime/MemoryManagerOnDemand.h");
+
+ // The general structure creation.
_artifactClass = _module.createClass(_module.name());
- _constructor = _artifactClass->func(true, "", _module.name());
- _constrBlock = _constructor->getBlock();
+ _constrBlock = _artifactClass->getConstrBlock();
_inferenceFunction = _artifactClass->func(true, "void", "Inference");
_infBlock = _inferenceFunction->getBlock();
+
+ // Input parameter stream preparation.
_parInVar = _artifactClass->var(false, "std::ifstream", "_parIn");
_parIn = _parInVar->use();
string par_file_name = cli::artifactName + ".par";
file_fail_block->addStatement(AF::lit("throw std::string(\"Failed to open file: " +
par_file_name + " for reading\")"));
+ // Traverse the computational graph.
g->accept(this);
+
+ // Generate all the deferred entities.
genNamed();
+ genAllocates();
+ genDeserializations();
+ genFillings();
// Make sure all the OpenCL jobs are done executing:
_infBlock->call("sync", {}, AF::call("get", {}, _clScheduler, ArtifactCallType::scope));
}
void AclCppOpGenerator::visit(ops::ConcatOp& op) {
- static const char* axis_names[] = {"arm_compute::DataLayoutDimension::CHANNEL",
- "arm_compute::DataLayoutDimension::HEIGHT",
+ static const char* axis_names[] = {"arm_compute::DataLayoutDimension::HEIGHT",
"arm_compute::DataLayoutDimension::WIDTH",
- "arm_compute::DataLayoutDimension::BATCHES"};
+ "arm_compute::DataLayoutDimension::CHANNEL"};
- assert(op.getAxis() < sizeof(axis_names) / sizeof(const char*));
- auto out = genTensor(op, op.getOutputShape(0));
+ int axis = op.getAxis() < 0 ? op.getOutputShape(0).rank() + op.getAxis() : op.getAxis();
+ assert(axis < sizeof(axis_names) / sizeof(const char*));
+ auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
auto prefix = out->name() + "_concatenate_layer";
auto inputs_var = _constrBlock->var("std::vector<arm_compute::ICLTensor*>", prefix + "_inputs");
auto inputs = inputs_var->use();
for (auto i : op.getPrevNodes())
_constrBlock->call("push_back", {AF::ref(AF::id(tensorName(i.op)))}, inputs);
- auto concat_layer_var = _artifactClass->var(false, "arm_compute::CLConcatenateLayer", prefix);
- auto concat_layer = concat_layer_var->use();
- _constrBlock->call("configure", {inputs, AF::ref(out), AF::lit(axis_names[op.getAxis()])},
- concat_layer);
- _infBlock->call("run", {}, concat_layer);
+ auto layer = genLayer("arm_compute::CLConcatenateLayer", prefix,
+ {inputs, AF::ref(out), AF::lit(axis_names[axis])});
+
+ allocate(out);
+ runLayer(layer);
}
void AclCppOpGenerator::visit(ops::Conv2DOp& op) {
}
void AclCppOpGenerator::visit(ops::DepthwiseConv2DOp& op) {
- genConvolution(op, "arm_compute::CLDepthwiseConvolutionLayer",
- "_depthwise_convolution_layer");
+ genConvolution(op, "arm_compute::CLDepthwiseConvolutionLayer", "_depthwise_convolution_layer");
}
void AclCppOpGenerator::visit(ops::SoftmaxOp& op) {
assert(in_ops.size() == 1);
auto in_op = in_ops[0].op;
auto in = AF::id(tensorName(in_op));
- auto out = genTensor(op, op.getOutputShape(0));
- auto sm_layer_var = _artifactClass->var(false, "arm_compute::CLSoftmaxLayer",
- out->name() + "_softmax_layer");
- auto sm_layer = sm_layer_var->use();
- _constrBlock->call("configure", {AF::ref(in), AF::ref(out)}, sm_layer);
- _infBlock->call("run", {}, sm_layer);
+
+ int rank = op.getOutputShape(0).rank();
+ // CLPermute does not support all kinds of permutations now.
+ // rank can be more than 2 in our models, so we can not use CLTranspose.
+ // This means we can support tensors with no more then one axis > 1.
+ int axis = op.getAxis() < 0 ? rank + op.getAxis() : op.getAxis();
+ assert(axis == rank - 1);
+ int nof_long_axes = 0;
+
+ for (int i = 0; i < rank; ++i) {
+ if (op.getOutputShape(0).dim(i) > 1)
+ ++nof_long_axes;
+ }
+
+ // TODO: Consider how to support Softmax on more general inputs.
+ if (nof_long_axes > 1)
+ throw AclCppException("Unsupported Softmax operation with several dimensions greater than 1");
+
+ // Create the output tensor.
+ Shape in_out_shape(op.getOutputShape(0));
+ Shape sm_shape(in_out_shape);
+
+ if (axis != 0) {
+ int sm_dim = sm_shape.dim(axis);
+ sm_shape.dim(axis) = sm_shape.dim(0);
+ sm_shape.dim(0) = sm_dim;
+ }
+
+ auto out = genTensor(op, in_out_shape);
+ auto prefix = out->name();
+
+ if (axis == 0) {
+ // Simple version: do not need pre and post reshapes.
+ // Apply the softmax operaion.
+ auto sm = genLayer("arm_compute::CLSoftmaxLayer", prefix + "_softmax_layer",
+ {AF::ref(in), AF::ref(out)});
+ allocate(out);
+ runLayer(sm);
+ } else {
+ // Need to reshape before the Softmax application and after it.
+ // Then we need two tensors for intermediate results. This is because we do a couple of auxiliary
+ // reshapes: one to transform the input tensor to a unidimensional tensor and the second to
+ // transorm the result of the softmax operation back to the original form.
+ auto tmp = genTensor(prefix + "_tmp", sm_shape);
+ auto tmp2 = genTensor(prefix + "_tmp2", sm_shape);
+
+ // Do the input permutation.
+ auto transp1 = genLayer("arm_compute::CLReshapeLayer", prefix + "_transp_layer1",
+ {AF::ref(in), AF::ref(tmp)});
+ allocate(tmp);
+ runLayer(transp1);
+
+ // Apply the softmax operaion.
+ auto sm = genLayer("arm_compute::CLSoftmaxLayer", prefix + "_softmax_layer",
+ {AF::ref(tmp), AF::ref(tmp2)});
+ allocate(tmp2);
+ runLayer(sm);
+
+ // Reshape the output to the original form.
+ auto transp2 = genLayer("arm_compute::CLReshapeLayer", prefix + "_transp_layer2",
+ {AF::ref(tmp2), AF::ref(out)});
+ allocate(out);
+ runLayer(transp2);
+ }
}
void AclCppOpGenerator::visit(ops::PoolOp& op) {
auto in_op = prev_nodes[0].op;
auto in = AF::id(tensorName(in_op));
- auto out = genTensor(op, op.getOutputShape(0));
+ auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
auto prefix = out->name() + "_pooling_layer";
auto pad_stride_info_var = _constrBlock->var("arm_compute::PadStrideInfo",
prefix + "_pad_stride_info",
- {}, {AF::lit(to_string(op.getStrides().dim(0))),
- AF::lit(to_string(op.getStrides().dim(1))),
- AF::lit(to_string(op.getPadding(0))),
- AF::lit(to_string(op.getPadding(1)))});
+ {}, {AF::lit(to_string(op.getStrides().dim(1))),
+ AF::lit(to_string(op.getStrides().dim(0))),
+ AF::lit(to_string(op.getPadding(1))),
+ AF::lit(to_string(op.getPadding(0)))});
auto pad_stride_info = pad_stride_info_var->use();
auto kernel_window_var = _constrBlock->var("arm_compute::Size2D", prefix + "_kernel_window", {},
- {AF::lit(to_string(op.getWindowShape().dim(0))),
- AF::lit(to_string(op.getWindowShape().dim(1)))});
+ {AF::lit(to_string(op.getWindowShape().dim(1))),
+ AF::lit(to_string(op.getWindowShape().dim(0)))});
auto kernel_window = kernel_window_var->use();
- auto pooling_info_var = _constrBlock->var("arm_compute::PoolingLayerInfo",
- prefix + "_pooling_info", {}, {AF::lit(pooling_type),
- kernel_window, pad_stride_info});
+ auto pooling_info_var = _constrBlock->var(
+ "arm_compute::PoolingLayerInfo", prefix + "_pooling_info", {},
+ {AF::lit(pooling_type), kernel_window, pad_stride_info,
+ AF::lit(op.getBorderType() == ops::PoolOp::BorderType::EMPTY ? "true" : "false")});
auto pooling_info = pooling_info_var->use();
-
- auto pooling_layer_var = _artifactClass->var(false, "arm_compute::CLPoolingLayer", prefix);
- auto pooling_layer = pooling_layer_var->use();
- _constrBlock->call("configure", {AF::ref(in), AF::ref(out), pooling_info}, pooling_layer);
- _infBlock->call("run", {}, pooling_layer);
+ auto layer = genLayer("arm_compute::CLPoolingLayer", prefix,
+ {AF::ref(in), AF::ref(out), pooling_info});
+ allocate(out);
+ runLayer(layer);
}
void AclCppOpGenerator::visit(ops::FullyConnectedOp& op) {
auto in = AF::id(tensorName(in_op));
// Create the output tensor in the DOM.
- auto out = genTensor(op, op.getOutputShape(0));
+ auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
string operation_name = out->name() + "_fully_connected_layer";
// Create the weights tensor in the DOM and use its id.
auto weights = genTensor(operation_name + "_weights", ir_weights_shape);
- // Serialize the weights tensor and generate the function to deserialize it in the artifact.
- serializeTensor(ir_weights);
- _constrBlock->call("deserializeTensor", {_parIn, weights});
-
// Instantiate the CLFullyConnectedLayer object.
- auto fully_layer_var = _artifactClass->var(false, "arm_compute::CLFullyConnectedLayer",
- operation_name);
- auto fully_layer = fully_layer_var->use();
-
- // Call the: fully_layer.configure(&in, &weights, nullptr, &out);
- _constrBlock->call("configure", {AF::ref(in), AF::ref(weights), AF::lit("nullptr"), AF::ref(out)},
- fully_layer);
-
- // Call the: fully_layer.run();
- _infBlock->call("run", {}, fully_layer);
+ auto layer = genLayer("arm_compute::CLFullyConnectedLayer", operation_name,
+ {AF::ref(in), AF::ref(weights), AF::lit("nullptr"), AF::ref(out)});
+ allocate(weights);
+ // Serialize the weights tensor and generate the function to deserialize it in the artifact.
+ serializeTensor(weights, ir_weights);
+ allocate(out);
+ runLayer(layer);
}
void AclCppOpGenerator::visit(ops::CappedReluOp& op) {
auto in = AF::id(tensorName(in_op));
// Create the output tensor in the DOM and obtain its identifier.
- auto out = genTensor(op, op.getOutputShape(0));
+ auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
// Prefix used for the name of variables related to the operation implementation.
string operation_name = out->name() + "_bias_add_layer";
ir_biases_shape.dim(-1) = ir_biases.getShape().dim(0);
auto biases = genTensor(operation_name + "_biases", ir_biases_shape);
- // Save the IR biases tensor to later read this in the artifact.
- serializeTensor(ir_biases);
- _constrBlock->call("deserializeTensor", {_parIn, biases});
-
// Instantiate the CLArithmeticAddition object.
- auto arithmetic_add_layer_var = _artifactClass->var(false, "arm_compute::CLArithmeticAddition",
- operation_name);
- auto arithmetic_add_layer = arithmetic_add_layer_var->use();
-
- // Call the: arithmetic_add_layer.configure(&in, &biases, &out);
- _constrBlock->call("configure", {AF::ref(in), AF::ref(biases), AF::ref(out)},
- arithmetic_add_layer);
-
- // Call the: arithmetic_add_layer.run();
- _infBlock->call("run", {}, arithmetic_add_layer);
+ auto layer = genLayer("arm_compute::CLArithmeticAddition", operation_name,
+ {AF::ref(in), AF::ref(biases), AF::ref(out),
+ AF::lit("arm_compute::ConvertPolicy::WRAP")});
+ allocate(biases);
+ // Save the IR biases tensor to later read this in the artifact.
+ serializeTensor(biases, ir_biases);
+ allocate(out);
+ runLayer(layer);
}
void AclCppOpGenerator::visit(ops::VariableOp& op) {
- genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+ auto tensor = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+ allocate(tensor);
}
void AclCppOpGenerator::visit(ops::ReluOp& op) {
auto in = AF::id(tensorName(in_op));
// Create the output tensor in the DOM and return its id.
- auto out = genTensor(op, op.getOutputShape(0));
+ auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
// Create an instance of the CLReshapeLayer class as a member of the artifact class.
- auto reshape_layer_var = _artifactClass->var(false, "arm_compute::CLReshapeLayer",
- out->name() + "_reshape_layer");
- auto reshape_layer = reshape_layer_var->use();
-
- // Generate the call: reshape_layer.configure(&in, &out);
- _constrBlock->call("configure", {AF::ref(in), AF::ref(out)}, reshape_layer);
-
- // Generate the call: reshape_layer.run();
- _infBlock->call("run", {}, reshape_layer);
+ auto layer = genLayer("arm_compute::CLReshapeLayer", out->name() + "_reshape_layer",
+ {AF::ref(in), AF::ref(out)});
+ allocate(out);
+ runLayer(layer);
}
void AclCppOpGenerator::visit(ops::ScaleOp& op) {
auto in = AF::id(tensorName(in_op));
// Generate output tensor description in the DOM.
- auto out = genTensor(op, op.getOutputShape(0));
- auto prefix = out->name() + "_scale_layer";
-
- // Create a CLPixelWiseMultiplication instance.
- auto scale_layer_var = _artifactClass->var(false, "arm_compute::CLPixelWiseMultiplication",
- prefix);
- auto scale_layer = scale_layer_var->use();
- auto scale_tensor = genTensor(prefix + "_scales", in_op->getOutputShape(0));
+ auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
+ auto operation_name = out->name() + "_scale_layer";
- // Construct the vector containing scales.
- auto scales_var = _constrBlock->var("std::vector<float>", prefix + "_scales");
- auto scales = scales_var->use();
const auto& ir_scales = op.getWeights();
- Tensor<float> scale_access(ir_scales);
-
- for (auto& idx: ShapeRange(ir_scales.getShape())) {
- float v = scale_access.at(idx);
- _constrBlock->call("push_back", {AF::lit(to_string(v))}, scales);
- }
+ // Reshape the IR scales tensor and generate the corresponding DOM tensor.
+ Shape ir_scales_shape;
+ const auto ir_input_shape = transposeShape<1, 0, 2>(op.getInputShape(0));
+ ir_scales_shape.resize(ir_input_shape.rank());
- int dim = op.getInputShape(0).rank() - 1;
+ // ACL CLArithmeticDivision supports input tensors broadcasting.
+ for (int i = 0; i < ir_input_shape.rank() - 1; ++i)
+ ir_scales_shape.dim(i) = 1;
- // Call the: fillTensorScales(scale_tensor, dim, scales);
- _constrBlock->call("fillTensorScales", {scale_tensor, AF::lit(to_string(dim)), scales});
+ ir_scales_shape.dim(-1) = ir_scales.getShape().dim(0);
+ auto scales = genTensor(operation_name + "_scales", ir_scales_shape);
- // Call the: scale_layer.configure(&in, &scale_tensor, &out, 1, ConvertPolicy::WRAP,
- // RoundingPolicy::TO_NEAREST_EVEN);
- _constrBlock->call("configure", {AF::ref(in), AF::ref(scale_tensor), AF::ref(out), AF::lit("1"),
- AF::lit("arm_compute::ConvertPolicy::WRAP"),
- AF::lit("arm_compute::RoundingPolicy::TO_NEAREST_EVEN")},
- scale_layer);
+ // We do not use the genMultiplication() function here because the input needs broadcasting.
- // Call the: scale_layer.run();
- _infBlock->call("run", {}, scale_layer);
+ // Create a unit tensor in the DOM.
+ auto unit = genTensor(operation_name + "_unit", ir_input_shape);
+
+ // Create a tmp tensor in the DOM to store the result of 1 / scale.
+
+ auto tmp = genTensor(operation_name + "_tmp", ir_input_shape);
+
+ // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
+ auto layer1 = genLayer("arm_compute::CLArithmeticDivision",
+ operation_name + "_arithmetic_div_layer_1",
+ {AF::ref(unit), AF::ref(scales), AF::ref(tmp)});
+ runLayer(layer1);
+
+ // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
+ auto layer2 = genLayer("arm_compute::CLArithmeticDivision",
+ operation_name + "_arithmetic_div_layer_2",
+ {AF::ref(in), AF::ref(tmp), AF::ref(out)});
+ allocate(scales);
+ // Save the IR scales tensor to later read this in the artifact.
+ serializeTensor(scales, ir_scales);
+ allocate(unit);
+ // Fill the unit tensor with the 1 value.
+ fillTensor(unit, "1");
+ allocate(tmp);
+ allocate(out);
+ runLayer(layer2);
}
void AclCppOpGenerator::visit(ops::BatchNormOp& op) {
const Shape& ir_weights_shape = ir_weights->getShape();
assert(ir_weights_shape.rank() == 4);
Shape ir_biases_shape({ir_weights_shape.dim(-1)});
- const Shape& strides = op.getStrides();
+ const Shape& strides = transposeShape<1, 0>(op.getStrides());
assert(strides.rank() == 3 && strides.dim(2) == 1);
- uint32_t pad_x = op.getPadding(0);
- uint32_t pad_y = op.getPadding(1);
+ uint32_t pad_x = op.getPadding(1);
+ uint32_t pad_y = op.getPadding(0);
assert(op.getPadding(2) == 0);
auto& prev_nodes = op.getPrevNodes();
// Generate a tensor for weights (kernel) in the DOM.
auto weights = genTensor(operation_name + "_weights", ir_weights_shape);
- // Save the IR weights tensor to later read this in the artifact.
- serializeTensor(*ir_weights);
- _constrBlock->call("deserializeTensor", {_parIn, weights});
-
// Create a local variable of type PadStrideInfo in the artifact constructor:
// PadStrideInfo pad_stride_info(stride_x, stride_y, pad_x, pad_y);
auto pad_stride_info_var = _constrBlock->var("arm_compute::PadStrideInfo",
operation_name + "_pad_stride_info",
{}, {AF::lit(to_string(strides.dim(0))),
- AF::lit(to_string(strides.dim(1))),
- AF::lit(to_string(pad_x)),
- AF::lit(to_string(pad_y))});
+ AF::lit(to_string(strides.dim(1))),
+ AF::lit(to_string(pad_x)),
+ AF::lit(to_string(pad_y))});
auto pad_stride_info = pad_stride_info_var->use();
- // Create the convolution (/depthwise convolution/deconvolution) layer class instance.
- auto conv_layer_var = _artifactClass->var(false, acl_func_name, operation_name);
- auto conv_layer = conv_layer_var->use();
-
// The parameter for the conv_layer.config(&in, &weights, nullptr, &out, pad_stride_info)
// function call.
list<shared_ptr<ArtifactExpr>> config_params{AF::ref(in), AF::ref(weights), AF::lit("nullptr"),
config_params.push_back(AF::lit("0"));
}
- // Call the: conv_layer(&in, &weights, nullptr, &out, pad_stride_info(, 0, 0 - for deconv));
- _constrBlock->call("configure", config_params, conv_layer);
-
- // Call the: conv_layer.run();
- _infBlock->call("run", {}, conv_layer);
+ // Create the convolution (/depthwise convolution/deconvolution) layer class instance.
+ auto layer = genLayer(acl_func_name, operation_name, config_params);
+ allocate(weights);
+ // Save the IR weights tensor to later read this in the artifact.
+ serializeTensor(weights, *ir_weights);
+ allocate(out);
+ runLayer(layer);
}
-void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& activation_name, float a, float b) {
+void AclCppOpGenerator::genActivation(mir::Operation& op, const std::string& activation_name,
+ float a, float b) {
auto &prev_nodes = op.getPrevNodes();
assert(prev_nodes.size() == 1);
auto in = AF::id(tensorName(in_op));
// Create the output tensor in the DOM and return its id.
- auto out = genTensor(op, op.getOutputShape(0));
+ auto out = genTensor(op, transposeShape<1, 0, 2>(op.getOutputShape(0)));
auto prefix = out->name() + "_activation_layer";
// Create an instance of the ActivationLayerInfo class as a local variable in the artifact
auto activation_info = activation_info_var->use();
// Create an instance of the CLActivationLayer class as a member of the artifact class.
- auto activation_layer_var = _artifactClass->var(false, "arm_compute::CLActivationLayer",
- prefix);
- auto activation_layer = activation_layer_var->use();
-
- // Generate the call: activation_layer.configure(&in, &out, activation_info);
- _constrBlock->call("configure", {AF::ref(in), AF::ref(out), activation_info}, activation_layer);
-
- // Generate the call: activation_layer.run();
- _infBlock->call("run", {}, activation_layer);
+ auto layer = genLayer("arm_compute::CLActivationLayer", prefix,
+ {AF::ref(in), AF::ref(out), activation_info});
+ allocate(out);
+ runLayer(layer);
}
shared_ptr<ArtifactId> AclCppOpGenerator::genAddition(const string& prefix, int index,
auto unit = genTensor(operation_name + "_unit", ir_unit_shape);
// Fill the unit tensor with the 1 value.
- _constrBlock->call("fillTensor", {unit, AF::lit("1")});
+ fillTensor(unit, "1");
// Create a tmp tensor in the DOM to store the result of 1 / in2.
auto tmp = genTensor(operation_name + "_tmp", ir_shape);
- // Create an instance of the CLActivationLayer class as a member of the artifact class.
+ // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
auto arithmetic_div_layer_var1 = _artifactClass->var(false, "arm_compute::CLArithmeticDivision",
- operation_name + "_arithmetic_add_layer_1");
+ operation_name + "_arithmetic_div_layer_1");
auto arithmetic_div_layer1 = arithmetic_div_layer_var1->use();
// Generate the call: arithmetic_div_layer1.configure(&unit, &in2, &tmp);
// Generate the call: arithmetic_div_layer1.run();
_infBlock->call("run", {}, arithmetic_div_layer1);
- // Create an instance of the CLActivationLayer class as a member of the artifact class.
+ // Create an instance of the CLArithmeticDivision class as a member of the artifact class.
auto arithmetic_div_layer_var2 = _artifactClass->var(false, "arm_compute::CLArithmeticDivision",
- operation_name + "_arithmetic_add_layer_2");
+ operation_name + "_arithmetic_div_layer_2");
auto arithmetic_div_layer2 = arithmetic_div_layer_var2->use();
// Generate the call: arithmetic_div_layer2.configure(&in1, &tmp, &out);
_constrBlock->call("configure", {AF::ref(in1), AF::ref(tmp), AF::ref(out)},
- arithmetic_div_layer1);
+ arithmetic_div_layer2);
// Generate the call: arithmetic_div_layer2.run();
_infBlock->call("run", {}, arithmetic_div_layer2);
return out;
}
-string AclCppOpGenerator::tensorName(Operation* op) const {
+string AclCppOpGenerator::tensorName(const Operation* op) const {
string tensor_name;
if (!op->getName().empty()) {
return id;
}
-std::shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(mir::Operation& op, const Shape& ir_shape) {
+shared_ptr<ArtifactId> AclCppOpGenerator::genTensor(Operation& op, const Shape& ir_shape) {
if (op.getPrevNodes().empty())
_inputs.insert(&op);
}
}
-void AclCppOpGenerator::serializeTensor(const TensorVariant& tensor) {
+void AclCppOpGenerator::serializeTensor(shared_ptr<ArtifactId> tensor_id,
+ const TensorVariant& ir_tensor) {
+ serializeIRTensor(ir_tensor);
+ _serializations.push_back(tensor_id);
+}
+
+void AclCppOpGenerator::serializeIRTensor(const TensorVariant& tensor) {
const Shape& shape = tensor.getShape();
Index coords;
coords.resize(shape.rank());
}
for (;;) {
+ float v;
+ memcpy(&v, tensor.at(coords), tensor.getElementSize());
_parOut.write(tensor.at(coords), tensor.getElementSize());
bool stop = true;
int i;
}
}
+void AclCppOpGenerator::genDeserializations() {
+ for (auto s : _serializations)
+ _constrBlock->call("deserializeTensor", {_parIn, s});
+}
+
+void AclCppOpGenerator::genFillings() {
+ for (auto f : _fillings)
+ _constrBlock->call("fillTensor", {f.first, AF::lit(f.second)});
+}
+
+void AclCppOpGenerator::fillTensor(shared_ptr<ArtifactId> tensor_id, const string& val) {
+ _fillings.push_back(make_pair(tensor_id, val));
+}
+
void AclCppOpGenerator::visit(ops::SqueezeOp& op) {
assert(false && "Unimplemented operation: Squeeze");
}
+void AclCppOpGenerator::allocate(std::shared_ptr<ArtifactId> tensor_id) {
+ _allocates.push_back(tensor_id);
+}
+
+void AclCppOpGenerator::genAllocates() {
+ for (auto a : _allocates)
+ _constrBlock->call("allocate", {}, AF::call("allocator", {}, a), ArtifactCallType::ref);
+}
+
+shared_ptr<ArtifactId> AclCppOpGenerator::genLayer(
+ const string& layer_type,
+ const string& layer_name,
+ const list<shared_ptr<ArtifactExpr>>& config_params) {
+ auto layer_var = _artifactClass->var(false, layer_type, layer_name);
+ auto layer = layer_var->use();
+ _constrBlock->call("configure", config_params, layer);
+ return layer;
+}
+
+void AclCppOpGenerator::runLayer(shared_ptr<ArtifactId> layer_id) {
+ _infBlock->call("run", {}, layer_id);
+}
+
void AclCppOpGenerator::visit(mir::ops::ResizeOp& op) {
assert(false && "Unimplemented operation: Resize");
}