Rework support for constant tensors to not (almost) depend on concrete models.
Signed-off-by: Sergei Barannikov <s.barannikov@samsung.com>
void Caffe2Importer::import()
{
- _net = stdex::make_unique<NetDef>();
- loadModelFile(_predictNet, _net.get());
+ _predict_net = stdex::make_unique<NetDef>();
+ loadModelFile(_predictNet, _predict_net.get());
- auto net2 = stdex::make_unique<NetDef>();
- loadModelFile(_initNet, net2.get());
- _net->MergeFrom(*net2);
+ _init_net = stdex::make_unique<NetDef>();
+ loadModelFile(_initNet, _init_net.get());
collectUnsupportedOps();
-
- preloadAllTensors();
}
std::unique_ptr<mir::Graph> Caffe2Importer::createIR()
{
- for (auto &op : _net->op())
+ // Load initializers.
+ for (const auto &op : _init_net->op())
+ createMIRNodesFromOp(op);
+
+ // Create inputs. This has to be done after processing initializers, because they may contain
+ // fake inputs.
+ // TODO Caffe2 does not provide a way to detect model inputs. For now assume that the first input
+ // of the first operation is the only input to the model.
+ const auto &input_name = _predict_net->op(0).input(0);
+ _blobNameToOutput[input_name] = _opCreator->createInput(input_name, _inputShapes[0]);
+
+ for (const auto &op : _predict_net->op())
createMIRNodesFromOp(op);
setGraphOutputs();
void Caffe2Importer::collectUnsupportedOps()
{
std::set<std::string> unsupportedOps;
- for (auto &op : _net->op())
+ for (const auto &op : _predict_net->op())
{
if (_operatorTypes.find(op.type()) == _operatorTypes.end())
unsupportedOps.insert(op.type());
}
}
-void Caffe2Importer::preloadAllTensors()
-{
- for (auto &op : _net->op())
- {
- // All tensor values are stored in 'GivenTensorFill' and 'ConstantFill' operators, so skip rest
- auto opType = _operatorTypes.at(op.type());
- if ((opType == SupportedCaffe2OpType::givenTensorFill ||
- opType == SupportedCaffe2OpType::constantFill ||
- opType == SupportedCaffe2OpType::givenTensorInt64Fill) &&
- hasArgument(op.arg(), "values"))
- {
- _MIRTensors.insert(std::make_pair(op.output(0), createTensor(op)));
- }
- }
-}
-
void Caffe2Importer::createMIRNodesFromOp(const OperatorDef &op)
{
std::vector<mir::Operation::Output *> outputs;
- // If op input not met yet - consider it as model input
- if (op.input_size() > 0 && _blobNameToOutput.find(op.input(0)) == _blobNameToOutput.end())
- {
-
- outputs = _opCreator->createInput(op.input(0), _inputShapes.front());
- _blobNameToOutput[op.input(0)] = outputs.at(0);
-
- _inputShapes.erase(_inputShapes.begin(), _inputShapes.begin() + 1);
- }
auto inputs = getInputMIROps(op);
case SupportedCaffe2OpType::constantFill:
case SupportedCaffe2OpType::givenTensorFill:
case SupportedCaffe2OpType::givenTensorInt64Fill:
- return;
+ outputs = _opCreator->convertConstant(inputs, op);
+ break;
case SupportedCaffe2OpType::add:
- outputs = _opCreator->convertAdd(inputs, op, _MIRTensors);
+ outputs = _opCreator->convertAdd(inputs, op);
break;
case SupportedCaffe2OpType::averagePool:
outputs = _opCreator->convertAveragePool(inputs, op);
break;
case SupportedCaffe2OpType::conv:
- outputs = _opCreator->convertConv(inputs, op, _MIRTensors);
+ outputs = _opCreator->convertConv(inputs, op);
break;
case SupportedCaffe2OpType::concat:
outputs = _opCreator->convertConcat(inputs, op);
outputs = _opCreator->convertDropout(inputs, op);
break;
case SupportedCaffe2OpType::FC:
- outputs = _opCreator->convertFullyConnected(inputs, op, _MIRTensors);
+ outputs = _opCreator->convertFC(inputs, op);
break;
case SupportedCaffe2OpType::maxPool:
outputs = _opCreator->convertMaxPool(inputs, op);
break;
case SupportedCaffe2OpType::mul:
- outputs = _opCreator->convertMul(inputs, op, _MIRTensors);
+ outputs = _opCreator->convertMul(inputs, op);
break;
case SupportedCaffe2OpType::relu:
outputs = _opCreator->convertRelu(inputs);
outputs = _opCreator->convertSoftmax(inputs, op);
break;
case SupportedCaffe2OpType::spatialBN:
- outputs = _opCreator->convertSpatialBN(inputs, op, _MIRTensors);
+ outputs = _opCreator->convertSpatialBN(inputs, op);
break;
case SupportedCaffe2OpType::sum:
outputs = _opCreator->convertSum(inputs);
outputs = _opCreator->convertClip(inputs, op);
break;
case SupportedCaffe2OpType::reshape:
- outputs = _opCreator->convertReshape(inputs, op, _MIRTensors);
+ outputs = _opCreator->convertReshape(inputs, op);
break;
default:
assert(false && "All unsupported types should have been found before this pass.");
_blobNameToOutput[op.output(i)] = outputs.at(i);
}
- _lastMIROp = outputs.at(0)->getNode();
-}
-
-mir::TensorVariant Caffe2Importer::createTensor(const OperatorDef &op)
-{
- assert(hasArgument(op.arg(), "shape") && hasArgument(op.arg(), "values"));
-
- const auto &shape = findArgumentByName(op.arg(), "shape");
- const auto &values = findArgumentByName(op.arg(), "values");
-
- mir::DTYPE element_type;
- const SupportedCaffe2OpType op_type = _operatorTypes.at(op.type());
- const void *src_data;
- // if values on floats
- if (!values.floats().empty())
- {
- element_type = mir::DTYPE::FLOAT32;
- src_data = values.floats().data();
- }
- else
+ // `outputs` can be empty if constant input was not processed.
+ if (!outputs.empty())
{
- assert(!values.ints().empty());
- if (op_type == SupportedCaffe2OpType::givenTensorInt64Fill)
- {
- element_type = mir::DTYPE::INT64;
- }
- else
- {
- element_type = mir::DTYPE::INT32;
- }
- src_data = values.ints().data();
+ _lastMIROp = outputs.at(0)->getNode();
}
-
- mir::Shape tensor_shape(shape.ints_size());
-
- for (int i = 0; i < shape.ints_size(); ++i)
- {
- tensor_shape.dim(i) = shape.ints(i);
- }
-
- return mir::TensorVariant(element_type, tensor_shape, src_data);
}
std::vector<mir::Operation::Output *> Caffe2Importer::getInputMIROps(const OperatorDef &op)
{
- // caffe2 operation inputs not same as MIR inputs (ex: in caffe2 conv kernel and bias also inputs)
- // so choose caffe2 inputs, which are 'real' inputs
std::vector<mir::Operation::Output *> inputs;
- SupportedCaffe2OpType opType = _operatorTypes.at(op.type());
- if (opType != SupportedCaffe2OpType::givenTensorFill &&
- opType != SupportedCaffe2OpType::constantFill &&
- opType != SupportedCaffe2OpType::givenTensorInt64Fill)
+
+ for (const auto &input_name : op.input())
{
- for (auto &i : op.input())
- if (_blobNameToOutput.find(i) != _blobNameToOutput.end())
- inputs.push_back(_blobNameToOutput[i]);
+ if (_blobNameToOutput.find(input_name) == _blobNameToOutput.end())
+ throw std::runtime_error("Cannot find blob \"" + input_name + "\".");
+ inputs.push_back(_blobNameToOutput[input_name]);
}
return inputs;
std::string _predictNet;
std::string _initNet;
std::unique_ptr<mir::Graph> _graph;
- std::unique_ptr<::caffe2::NetDef> _net;
+ std::unique_ptr<caffe2::NetDef> _predict_net;
+ std::unique_ptr<caffe2::NetDef> _init_net;
std::unique_ptr<Caffe2OpCreator> _opCreator;
std::vector<mir::Shape> _inputShapes;
std::unordered_map<std::string, mir::Operation::Output *> _blobNameToOutput;
mir::Operation *_lastMIROp = nullptr;
- std::map<std::string, mir::TensorVariant> _MIRTensors;
-
void import();
std::unique_ptr<mir::Graph> createIR();
void createMIRNodesFromOp(const ::caffe2::OperatorDef &op);
/**
- * @brief Since caffe2 tensor values stored separately (in init_net) - preload them in _MIRTensors
- */
- void preloadAllTensors();
-
- /**
- * @brief Creates MIR tensor from caffe2 givenTensorFill op
- */
- mir::TensorVariant createTensor(const ::caffe2::OperatorDef &op);
-
- /**
* @brief Returns MIR operation outputs corresponding to the inputs of the given operator.
*/
std::vector<mir::Operation::Output *> getInputMIROps(const ::caffe2::OperatorDef &op);
#include "mir/TensorUtil.h"
#include <cmath>
-#include <set>
#include <vector>
namespace nnc
"If one custom kernel size specified - all custom kernel sizes must be specified");
}
+static mir::TensorVariant createTensor(const OperatorDef &op)
+{
+ assert(hasArgument(op.arg(), "shape") && hasArgument(op.arg(), "values"));
+
+ const auto &shape = findArgumentByName(op.arg(), "shape");
+ const auto &values = findArgumentByName(op.arg(), "values");
+
+ mir::DTYPE element_type;
+ const void *src_data;
+ // if values on floats
+ if (!values.floats().empty())
+ {
+ element_type = mir::DTYPE::FLOAT32;
+ src_data = values.floats().data();
+ }
+ else
+ {
+ assert(!values.ints().empty());
+ if (op.type() == "GivenTensorInt64Fill")
+ {
+ element_type = mir::DTYPE::INT64;
+ }
+ else
+ {
+ element_type = mir::DTYPE::INT32;
+ }
+ src_data = values.ints().data();
+ }
+
+ mir::Shape tensor_shape(shape.ints_size());
+
+ for (int i = 0; i < shape.ints_size(); ++i)
+ {
+ tensor_shape.dim(i) = shape.ints(i);
+ }
+
+ return mir::TensorVariant(element_type, tensor_shape, src_data);
+}
+
//
// Convert functions
//
std::vector<mir::Operation::Output *>
-Caffe2OpCreator::convertAdd(const std::vector<mir::Operation::Output *> &inputs,
- const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors)
+Caffe2OpCreator::convertConstant(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
{
- checkLayout(op);
+ // Constant may not contain any data if it is a fake input.
+ if (!hasArgument(op.arg(), "values"))
+ return {};
- std::vector<mir::Operation::Output *> add_input;
- add_input.reserve(inputs.size() + 1);
- for (const auto &i : inputs)
- add_input.emplace_back(convertCaffeToMIR(i));
+ return {createOp<ops::ConstantOp>("", createTensor(op))->getOutput(0)};
+}
- // check mir tensors contain operand
- if (mir_tensors.find(op.input(1)) != mir_tensors.end())
+std::vector<mir::Operation::Output *>
+Caffe2OpCreator::convertAdd(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
+{
+ if (getSingleArgument(op, "broadcast", 0) != 0)
{
- auto next_input = createOp<ops::ConstantOp>("Constant", mir_tensors.at(op.input(1)));
- add_input.emplace_back(next_input->getOutput(0));
- }
+ // FIXME This only works when 'axis' == 1 and the second input is 1-D.
+ std::vector<mir::Operation::Output *> transposed_inputs{convertCaffeToMIR(inputs[0]),
+ inputs[1]};
+ auto result = createOp<ops::ElementwiseOp>("Elementwise_Add", transposed_inputs,
+ ops::ElementwiseOp::OpType::add);
- auto add =
- createOp<ops::ElementwiseOp>("Elementwise_Add", add_input, ops::ElementwiseOp::OpType::add);
+ return {convertMIRToCaffe(result->getOutput(0))};
+ }
- return {convertMIRToCaffe(add->getOutput(0))};
+ auto result =
+ createOp<ops::ElementwiseOp>("Elementwise_Add", inputs, ops::ElementwiseOp::OpType::add);
+ return {result->getOutput(0)};
}
-
std::vector<mir::Operation::Output *>
Caffe2OpCreator::convertAveragePool(const std::vector<mir::Operation::Output *> &inputs,
const OperatorDef &op)
std::vector<mir::Operation::Output *>
Caffe2OpCreator::convertConv(const std::vector<mir::Operation::Output *> &inputs,
- const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors)
+ const ::caffe2::OperatorDef &op)
{
- checkConvLikeOp(op);
-
// dilation order: h w (not used)
Shape stride_shape(getStrides(op));
std::vector<int32_t> pad_before, pad_after;
std::tie(pad_before, pad_after) = getPadding(op);
- auto kernel_tensor = transposeTensor<2, 3, 1, 0>(mir_tensors.at(op.input(1)));
+ auto kernel_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[1]->getNode());
+ if (kernel_op == nullptr)
+ throw std::runtime_error("Conv: non-constant kernels is not supported yet.");
+
+ // [M, C, kH, kW] -> [kH, kW, C, M]
+ auto kernel_tensor = transposeTensor<2, 3, 1, 0>(kernel_op->getValue());
auto in_group_size = kernel_tensor.getShape().dim(2);
auto out_channels = kernel_tensor.getShape().dim(3);
int num_groups = getSingleArgument(op, "group", 1);
}
if (op.input_size() > 2)
- { // Bias is optional
- auto bias = createOp<ops::ConstantOp>("Constant", mir_tensors.at(op.input(2)))->getOutput(0);
- result = createOp<ops::BiasAddOp>("Bias_Add", result->getOutput(0), bias);
+ {
+ result = createOp<ops::BiasAddOp>("Bias_Add", result->getOutput(0), inputs[2]);
}
return {convertMIRToCaffe(result->getOutput(0))};
{
checkLayout(op);
+ // `1` corresponds to the default (channels) axis.
int axis = getSingleArgument(op, "axis", 1);
auto result = createOp<ops::ConcatOp>("Concat", inputs, axis);
return {result->getOutput(0)};
}
std::vector<mir::Operation::Output *>
-Caffe2OpCreator::convertFullyConnected(const std::vector<mir::Operation::Output *> &inputs,
- const ::caffe2::OperatorDef &op,
- const MIRTensors &mir_tensors)
+Caffe2OpCreator::convertFC(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op)
{
for (auto &s : {"axis", "axis_w", "float16_compute"})
if (hasArgument(op.arg(), s))
throw std::runtime_error(std::string("FC: only default '") + s + "' value is supported");
- auto weights_tensor = transposeTensor<1, 0>(mir_tensors.at(op.input(1)));
+ auto weights_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[1]->getNode());
+ if (weights_op == nullptr)
+ throw std::runtime_error("FC: non-constant weights is not supported yet.");
+
+ auto weights_tensor = transposeTensor<1, 0>(weights_op->getValue());
const auto &input_shape = inputs[0]->getShape();
// Transform input into 2-D tensor by flattening axes
auto reshape = createOp<ops::ReshapeOp>("Reshape", inputs[0], shape);
auto weights = createOp<ops::ConstantOp>("Constant", weights_tensor)->getOutput(0);
auto result = createOp<ops::FullyConnectedOp>("Fully_Connected", reshape->getOutput(0), weights);
- auto bias = createOp<ops::ConstantOp>("Constant", mir_tensors.at(op.input(2)))->getOutput(0);
- result = createOp<ops::BiasAddOp>("Bias_Add", result->getOutput(0), bias);
+ result = createOp<ops::BiasAddOp>("Bias_Add", result->getOutput(0), inputs[2]);
return {result->getOutput(0)};
}
std::vector<mir::Operation::Output *>
Caffe2OpCreator::convertMul(const std::vector<mir::Operation::Output *> &inputs,
- const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors)
+ const ::caffe2::OperatorDef &op)
{
- checkLayout(op);
-
- std::vector<mir::Operation::Output *> input_descriptors;
- input_descriptors.reserve(inputs.size() + 1);
- for (const auto &i : inputs)
- input_descriptors.emplace_back(convertCaffeToMIR(i));
-
- // TODO: replace ConstantOp on inputs
- if (mir_tensors.find(op.input(1)) != mir_tensors.end())
+ if (getSingleArgument(op, "broadcast", 0) != 0)
{
- auto const_tensor = createOp<ops::ConstantOp>("Constant", mir_tensors.at(op.input(1)));
- input_descriptors.emplace_back(const_tensor->getOutput(0));
- }
+ // FIXME This only works when `axis` == 1 and the second input is 1-D.
+ std::vector<mir::Operation::Output *> transposed_inputs{convertCaffeToMIR(inputs[0]),
+ inputs[1]};
+ auto result = createOp<ops::ElementwiseOp>("Elementwise_Mul", transposed_inputs,
+ ops::ElementwiseOp::OpType::mul);
- auto mul = createOp<ops::ElementwiseOp>("Elementwise_Mul", input_descriptors,
- ops::ElementwiseOp::OpType::mul);
+ return {convertMIRToCaffe(result->getOutput(0))};
+ }
- return {convertMIRToCaffe(mul->getOutput(0))};
+ auto result =
+ createOp<ops::ElementwiseOp>("Elementwise_Mul", inputs, ops::ElementwiseOp::OpType::mul);
+ return {result->getOutput(0)};
}
std::vector<mir::Operation::Output *>
std::vector<mir::Operation::Output *>
Caffe2OpCreator::convertSpatialBN(const std::vector<mir::Operation::Output *> &inputs,
- const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors)
+ const ::caffe2::OperatorDef &op)
{
checkLayout(op);
// overall_res = (X - mean) / sqrt(var + epsilon) * scale + bias
- const auto &scale_tensor = mir_tensors.at(op.input(1));
- const auto &bias_tensor = mir_tensors.at(op.input(2));
- const auto &mean_tensor = mir_tensors.at(op.input(3));
- const auto &var_tensor = mir_tensors.at(op.input(4));
+ auto scale_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[1]->getNode());
+ auto bias_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[2]->getNode());
+ auto mean_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[3]->getNode());
+ auto var_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[4]->getNode());
+ if (scale_op == nullptr || bias_op == nullptr || mean_op == nullptr || var_op == nullptr)
+ throw std::runtime_error(
+ "SpatialBN: non-constant 'scale', 'bias', 'mean' and 'var' inputs are not supported yet.");
+
+ const auto &scale_tensor = scale_op->getValue();
+ const auto &bias_tensor = bias_op->getValue();
+ const auto &mean_tensor = mean_op->getValue();
+ const auto &var_tensor = var_op->getValue();
float eps = getSingleArgument(op, "epsilon", 1e-5f);
// res1 = X - mean
std::vector<mir::Operation::Output *>
Caffe2OpCreator::convertReshape(const std::vector<mir::Operation::Output *> &inputs,
- const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors)
+ const ::caffe2::OperatorDef &op)
{
- // Check new shape input
- assert(mir_tensors.find(op.input(1)) != mir_tensors.end());
- const auto &shape_tensor = mir_tensors.at(op.input(1));
+ auto shape_op = dynamic_cast<mir::ops::ConstantOp *>(inputs[1]->getNode());
+ if (shape_op == nullptr)
+ throw std::runtime_error("Reshape: non-constant shape is not supported yet.");
+
+ const auto &shape_tensor = shape_op->getValue();
Tensor<int64_t> out_shape_tensor(shape_tensor);
return {reshape->getOutput(0)};
}
-std::vector<mir::Operation::Output *> Caffe2OpCreator::createInput(const std::string &name,
- const mir::Shape &shape)
+Operation::Output *Caffe2OpCreator::createInput(const std::string &name, const mir::Shape &shape)
{
- auto variable = _graph->create<ops::InputOp>(name, shape);
- return {variable->getOutput(0)};
+ return _graph->create<ops::InputOp>(name, shape)->getOutput(0);
}
} // namespace nnc
namespace nnc
{
-using mir::Graph;
using mir::Operation;
using mir::Shape;
-using MIRTensors = const std::map<std::string, mir::TensorVariant>;
class Caffe2OpCreator
{
public:
- explicit Caffe2OpCreator(Graph *g) : _graph(g){};
+ explicit Caffe2OpCreator(mir::Graph *g) : _graph(g) {}
- std::vector<mir::Operation::Output *> createInput(const std::string &name,
- const mir::Shape &shape);
+ Operation::Output *createInput(const std::string &name, const mir::Shape &shape);
std::vector<mir::Operation::Output *>
- convertAdd(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op,
- const MIRTensors &mir_tensors);
+ convertConstant(const std::vector<mir::Operation::Output *> &inputs,
+ const ::caffe2::OperatorDef &op);
+
+ std::vector<mir::Operation::Output *>
+ convertAdd(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op);
std::vector<mir::Operation::Output *>
convertAveragePool(const std::vector<mir::Operation::Output *> &inputs,
const ::caffe2::OperatorDef &op);
std::vector<mir::Operation::Output *>
- convertConv(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op,
- const MIRTensors &mir_tensors);
+ convertConv(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op);
std::vector<mir::Operation::Output *>
convertConcat(const std::vector<mir::Operation::Output *> &inputs,
const ::caffe2::OperatorDef &op);
std::vector<mir::Operation::Output *>
- convertFullyConnected(const std::vector<mir::Operation::Output *> &inputs,
- const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors);
+ convertFC(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op);
std::vector<mir::Operation::Output *>
convertMaxPool(const std::vector<mir::Operation::Output *> &inputs,
const ::caffe2::OperatorDef &op);
std::vector<mir::Operation::Output *>
- convertMul(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op,
- const MIRTensors &mir_tensors);
+ convertMul(const std::vector<mir::Operation::Output *> &inputs, const ::caffe2::OperatorDef &op);
std::vector<mir::Operation::Output *>
convertRelu(const std::vector<mir::Operation::Output *> &inputs);
std::vector<mir::Operation::Output *>
convertSpatialBN(const std::vector<mir::Operation::Output *> &inputs,
- const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors);
+ const ::caffe2::OperatorDef &op);
std::vector<mir::Operation::Output *>
convertSum(const std::vector<mir::Operation::Output *> &inputs);
std::vector<mir::Operation::Output *>
convertReshape(const std::vector<mir::Operation::Output *> &inputs,
- const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors);
+ const ::caffe2::OperatorDef &op);
private:
- Graph *_graph = nullptr;
+ mir::Graph *_graph = nullptr;
mir::Operation::Output *convertCaffeToMIR(mir::Operation::Output *arg);