void CPPCodeGenerator::materializeInferenceSequence(ostream &out, const ModelAnalyzer &ma)
{
using OpDescr = OpDescr;
+ // Allocate temporary(im2col) tensor
+ out << " Tensor " << _formattedTensors[ma.getTempTID()] <<
+ "(Shape{" << ma.getMaxTemporarySize() << "});\n";
for (const OpDescr &op: ma.getInferenceSequence())
{
if (op._op->getType() == mir::Operation::Type::variable)
using namespace nnc::mir;
-void ModelAnalyzer::addOpDescr(Operation* op, const string& function_name) {
+void ModelAnalyzer::addOpDescr(
+ Operation* op, const string& function_name, std::vector<size_t> aux_args = {}) {
vector<size_t> node_output_tensors;
const string &op_name = op->getName();
node_input_tensors.push_back(inTid);
}
+ // this op uses temporary memory (e.g. im2col)
+ if (!aux_args.empty()) {
+ std::copy(aux_args.begin(), aux_args.end(), std::back_inserter(node_input_tensors));
+ }
+
_inferenceSequence.push_back({op, function_name,
std::move(node_input_tensors),
std::move(node_output_tensors),
_opToDescr[op] = &_inferenceSequence.back();
}
+void ModelAnalyzer::updateMaxTemporarySize(const size_t size) {
+ _max_temp_size = std::max(_max_temp_size, size);
+}
+
size_t ModelAnalyzer::declareInputTensor(const std::string& name, const mir::Shape& shape) {
assert(!name.empty() && "Input tensor must have name");
size_t id = _allocatedTensors++;
auto constants = g->collectConstants();
init_ops.insert(init_ops.end(), constants.begin(), constants.end());
+ // Register temporary tensor for im2col buffer
+ _temp_tensor_id = declareTemporaryTensor();
+
// Walk all network inputs
for (Operation* in : init_ops) {
assert(dynamic_cast<ops::VariableOp*>(in) || dynamic_cast<ops::ConstantOp*>(in));
}
void ModelAnalyzer::visit(ops::Conv2DOp& op) {
- addOpDescr(&op, "conv2d");
+ const auto& kernel_shape = op.getKernel().getShape();
+ const auto& out_shape = op.getOutputShape(0);
+ const int32_t tmp_size = kernel_shape.dim(0) * kernel_shape.dim(1) * kernel_shape.dim(2)
+ * out_shape.dim(0) * out_shape.dim(1) * out_shape.dim(2);
+ updateMaxTemporarySize(static_cast<size_t>(tmp_size));
+ addOpDescr(&op, "conv2d", {_temp_tensor_id});
}
void ModelAnalyzer::visit(ops::DepthwiseConv2DOp& op) {
}
void ModelAnalyzer::visit(mir::ops::DeConv2DOp& op) {
- addOpDescr(&op, "convTransposed2d");
+ const auto& kernel_shape = op.getKernel().getShape();
+ const auto& out_shape = op.getOutputShape(0);
+ const int32_t tmp_size = kernel_shape.dim(0) * kernel_shape.dim(1) * kernel_shape.dim(3) *
+ out_shape.dim(0) * out_shape.dim(1) * out_shape.dim(2);
+ updateMaxTemporarySize(static_cast<size_t>(tmp_size));
+ addOpDescr(&op, "convTransposed2d", {_temp_tensor_id});
}
void ModelAnalyzer::visit(ops::SqueezeOp& op) {
#include <string>
#include <cassert>
#include <limits>
+#include <algorithm>
+#include <iterator>
namespace nnc {
// list of output tensors
std::vector<size_t> _outputs;
size_t _paramStartOffset;
+ std::list<size_t> _temporaries;
};
/**
return _modelName;
}
+ const size_t getMaxTemporarySize() const {
+ return _max_temp_size;
+ }
+
+ const size_t getTempTID() const {
+ return _temp_tensor_id;
+ }
+
private:
/**
* @brief Common function to add function call in inference sequence
* @param op Node representing added call
* @param function_name Function name
+ * @param aux_args Auxilliary argument ids
*
* Inserts information about CG operation into inference sequence: name of operation,
* creates tensors for operation outputs, binds operation inputs with tensors from previous operations
*/
- void addOpDescr(mir::Operation* op, const std::string& function_name);
+ void addOpDescr(mir::Operation* op,
+ const std::string& function_name, std::vector<size_t> aux_args);
+
+ /**
+ * @brief Registers a temporary buffer of size *size* used by op *op_id*
+ * @param size Size of buffer
+ */
+ void updateMaxTemporarySize(const size_t size);
/**
* @brief Declares input tensor in artifact
std::string _modelName = "NN";
std::list<OpDescr> _inferenceSequence;
size_t _allocatedTensors = 0;
-
- /// @brief list of artifact inputs
std::vector<size_t> _inputs;
/// @brief list of persistent tensors
std::vector<size_t> _persistent_tensors;
/// @brief list of tensor ids corresponding to NN outputs
std::vector<size_t> _outputs;
+ size_t _max_temp_size = 0;
+ size_t _temp_tensor_id;
std::vector<TensorDescription> _tensors;
std::map<const mir::Operation*, OpDescr*> _opToDescr;
};
out.getData(), shapeToDims(out.getShape()));
}
-void conv2d(Tensor &out, const char *params, const Tensor &in)
-{
+void conv2d(Tensor& out, const char* params, const Tensor& in, Tensor& temporary) {
const float *input = in.getData();
Dims<4> input_d = shapeToDims(in.getShape());
Kernel kernel = deserializeKernel(params);
const int pad_w = pads[1];
const int pad_h = pads[0];
- unique_ptr<float, void(*)(float *)> im2col(nullptr, [](float *d){delete [] d;});
+ float* im2col_data = nullptr;
if (stride_w != 1 || stride_h != 1 || kernel.dims.sizes[1] != 1 || kernel.dims.sizes[2] != 1)
{
- im2col.reset(new float[volume(im2col_d)]);
+ im2col_data = temporary.getData();
}
Conv(input, input_d,
stride_w, stride_h,
pad_w, pad_h,
out.getData(), out_d,
- im2col.get(), im2col_d);
+ im2col_data, im2col_d);
}
-void convTransposed2d(Tensor &out, const char *params, const Tensor &in) {
+void convTransposed2d(Tensor& out, const char* params, const Tensor& in, Tensor& temporary) {
const float *input = in.getData();
RuntimeShape input_shape = shapeToRuntimeShape(in.getShape());
KernelRT kernel = deserializeKernelRT(params);
const auto convPara = ConvParams({PaddingType::kSame,
PaddingValues({pad_w,pad_h}), stride_w, stride_h});
- unique_ptr<float, void(*)(float *)> im2col(nullptr, [](float *d){delete [] d;});
- if (stride_w != 1 || stride_h != 1 || kernel.shape.Dims(1) != 1 || kernel.shape.Dims(2) != 1) {
- im2col.reset(new float[im2col_shape.FlatSize()]);
- }
-
TransposeConv(
convPara, input_shape, input, kernel.shape, kernel.data,
- out_shape, out.getData(), im2col_shape, im2col.get());
+ out_shape, out.getData(), im2col_shape, temporary.getData());
}
void depthwiseConv2d(Tensor &out, const char *params, const Tensor &in)
*/
mir::Operation*
fillGraph(mir::Graph& g,
- function<mir::Operation*(mir::Graph& g, vector<mir::IODescriptor>& inputs)> op_gen,
+ const function<mir::Operation*(mir::Graph& g, vector<mir::IODescriptor>& inputs)>& op_gen,
const vector<unique_ptr<mir::TensorVariant>>& input_ntensors) {
// Create inputs
std::vector<mir::IODescriptor> inputs;
void fillNTensor(mir::TensorVariant &dst, float start) {
float t = start;
mir::Tensor<float> wrapper(dst);
- for (mir::Index idx: mir::ShapeRange(dst.getShape())) {
+ for (const mir::Index& idx: mir::ShapeRange(dst.getShape())) {
wrapper.at(idx) = sin(t) * 2.0f;
t += 1.0f;
}
*/
template <class TestFunc, class ...Args>
void createAndRunTestGraph(
- function<mir::Operation*(mir::Graph &,
- const std::vector<mir::IODescriptor>& inputs)> op_generator,
- TestFunc artifactOperation,
- const vector<unique_ptr<mir::TensorVariant>> &input_ntensors,
- const Args &...input_atensors) {
+ function<mir::Operation*(mir::Graph&,
+ const std::vector<mir::IODescriptor>& inputs)> op_generator,
+ TestFunc artifactOperation,
+ const vector<unique_ptr<mir::TensorVariant>>& input_ntensors,
+ Args& ...input_atensors) {
mir::Graph g;
mir::Operation *actual_operation = fillGraph(g, op_generator, input_ntensors);
// stride width, stride height
// size 3 is chosen to cover all cases, where width bigger/smaller then height and equal/not equal to 1
using iT = int32_t;
+ Tensor temporary(Shape({1024 * 40}));
for (iT kernel_h = 2; kernel_h <= 4; ++kernel_h)
for (iT kernel_w = 2; kernel_w <= 4; ++kernel_w)
for (iT input_c = 1; input_c <= 3; ++input_c)
return g.create<mir::ops::DeConv2DOp>("y", inputs[0], kernel, strides, pad_t);
};
- createAndRunTestGraph(op_generator, convTransposed2d, input_ntensors, input_atensor);
+ createAndRunTestGraph(op_generator, convTransposed2d, input_ntensors, input_atensor,
+ temporary);
}
}
// stride width, stride height
// size 3 is chosen to cover all cases, where width bigger/smaller then height and equal/not equal to 1
using iT = int32_t;
+ Tensor temporary(Shape({1024 * 20}));
for (iT kernel_h = 1; kernel_h <= 3; ++kernel_h)
for (iT kernel_w = 1; kernel_w <= 3; ++kernel_w)
for (iT input_c = 1; input_c <= 3; ++input_c)
padding);
};
- createAndRunTestGraph(op_generator, conv2d, input_ntensors, input_atensor);
+ createAndRunTestGraph(op_generator, conv2d, input_ntensors, input_atensor, temporary);
}
}