From 5bad3a6cc7ecebcc06973bc4cfaebcdf7a49e833 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?= =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?= =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Fri, 2 Aug 2019 22:02:28 +0300 Subject: [PATCH] [mir/Caffe2 importer] Improve support for constant tensors (#6176) Rework support for constant tensors to not (almost) depend on concrete models. Signed-off-by: Sergei Barannikov --- compiler/mir-caffe2-importer/caffe2_importer.cpp | 122 ++++----------- compiler/mir-caffe2-importer/caffe2_importer.h | 15 +- compiler/mir-caffe2-importer/caffe2_op_creator.cpp | 171 ++++++++++++++------- compiler/mir-caffe2-importer/caffe2_op_creator.h | 29 ++-- 4 files changed, 160 insertions(+), 177 deletions(-) diff --git a/compiler/mir-caffe2-importer/caffe2_importer.cpp b/compiler/mir-caffe2-importer/caffe2_importer.cpp index 7d6bd71..1d09a9e 100644 --- a/compiler/mir-caffe2-importer/caffe2_importer.cpp +++ b/compiler/mir-caffe2-importer/caffe2_importer.cpp @@ -79,21 +79,29 @@ static void loadModelFile(const std::string &filename, caffe2::NetDef *net) void Caffe2Importer::import() { - _net = stdex::make_unique(); - loadModelFile(_predictNet, _net.get()); + _predict_net = stdex::make_unique(); + loadModelFile(_predictNet, _predict_net.get()); - auto net2 = stdex::make_unique(); - loadModelFile(_initNet, net2.get()); - _net->MergeFrom(*net2); + _init_net = stdex::make_unique(); + loadModelFile(_initNet, _init_net.get()); collectUnsupportedOps(); - - preloadAllTensors(); } std::unique_ptr Caffe2Importer::createIR() { - for (auto &op : _net->op()) + // Load initializers. + for (const auto &op : _init_net->op()) + createMIRNodesFromOp(op); + + // Create inputs. This has to be done after processing initializers, because they may contain + // fake inputs. + // TODO Caffe2 does not provide a way to detect model inputs. For now assume that the first input + // of the first operation is the only input to the model. + const auto &input_name = _predict_net->op(0).input(0); + _blobNameToOutput[input_name] = _opCreator->createInput(input_name, _inputShapes[0]); + + for (const auto &op : _predict_net->op()) createMIRNodesFromOp(op); setGraphOutputs(); @@ -110,7 +118,7 @@ std::unique_ptr Caffe2Importer::importModel() void Caffe2Importer::collectUnsupportedOps() { std::set unsupportedOps; - for (auto &op : _net->op()) + for (const auto &op : _predict_net->op()) { if (_operatorTypes.find(op.type()) == _operatorTypes.end()) unsupportedOps.insert(op.type()); @@ -125,34 +133,9 @@ void Caffe2Importer::collectUnsupportedOps() } } -void Caffe2Importer::preloadAllTensors() -{ - for (auto &op : _net->op()) - { - // All tensor values are stored in 'GivenTensorFill' and 'ConstantFill' operators, so skip rest - auto opType = _operatorTypes.at(op.type()); - if ((opType == SupportedCaffe2OpType::givenTensorFill || - opType == SupportedCaffe2OpType::constantFill || - opType == SupportedCaffe2OpType::givenTensorInt64Fill) && - hasArgument(op.arg(), "values")) - { - _MIRTensors.insert(std::make_pair(op.output(0), createTensor(op))); - } - } -} - void Caffe2Importer::createMIRNodesFromOp(const OperatorDef &op) { std::vector outputs; - // If op input not met yet - consider it as model input - if (op.input_size() > 0 && _blobNameToOutput.find(op.input(0)) == _blobNameToOutput.end()) - { - - outputs = _opCreator->createInput(op.input(0), _inputShapes.front()); - _blobNameToOutput[op.input(0)] = outputs.at(0); - - _inputShapes.erase(_inputShapes.begin(), _inputShapes.begin() + 1); - } auto inputs = getInputMIROps(op); @@ -162,15 +145,16 @@ void Caffe2Importer::createMIRNodesFromOp(const OperatorDef &op) case SupportedCaffe2OpType::constantFill: case SupportedCaffe2OpType::givenTensorFill: case SupportedCaffe2OpType::givenTensorInt64Fill: - return; + outputs = _opCreator->convertConstant(inputs, op); + break; case SupportedCaffe2OpType::add: - outputs = _opCreator->convertAdd(inputs, op, _MIRTensors); + outputs = _opCreator->convertAdd(inputs, op); break; case SupportedCaffe2OpType::averagePool: outputs = _opCreator->convertAveragePool(inputs, op); break; case SupportedCaffe2OpType::conv: - outputs = _opCreator->convertConv(inputs, op, _MIRTensors); + outputs = _opCreator->convertConv(inputs, op); break; case SupportedCaffe2OpType::concat: outputs = _opCreator->convertConcat(inputs, op); @@ -179,13 +163,13 @@ void Caffe2Importer::createMIRNodesFromOp(const OperatorDef &op) outputs = _opCreator->convertDropout(inputs, op); break; case SupportedCaffe2OpType::FC: - outputs = _opCreator->convertFullyConnected(inputs, op, _MIRTensors); + outputs = _opCreator->convertFC(inputs, op); break; case SupportedCaffe2OpType::maxPool: outputs = _opCreator->convertMaxPool(inputs, op); break; case SupportedCaffe2OpType::mul: - outputs = _opCreator->convertMul(inputs, op, _MIRTensors); + outputs = _opCreator->convertMul(inputs, op); break; case SupportedCaffe2OpType::relu: outputs = _opCreator->convertRelu(inputs); @@ -200,7 +184,7 @@ void Caffe2Importer::createMIRNodesFromOp(const OperatorDef &op) outputs = _opCreator->convertSoftmax(inputs, op); break; case SupportedCaffe2OpType::spatialBN: - outputs = _opCreator->convertSpatialBN(inputs, op, _MIRTensors); + outputs = _opCreator->convertSpatialBN(inputs, op); break; case SupportedCaffe2OpType::sum: outputs = _opCreator->convertSum(inputs); @@ -209,7 +193,7 @@ void Caffe2Importer::createMIRNodesFromOp(const OperatorDef &op) outputs = _opCreator->convertClip(inputs, op); break; case SupportedCaffe2OpType::reshape: - outputs = _opCreator->convertReshape(inputs, op, _MIRTensors); + outputs = _opCreator->convertReshape(inputs, op); break; default: assert(false && "All unsupported types should have been found before this pass."); @@ -222,62 +206,22 @@ void Caffe2Importer::createMIRNodesFromOp(const OperatorDef &op) _blobNameToOutput[op.output(i)] = outputs.at(i); } - _lastMIROp = outputs.at(0)->getNode(); -} - -mir::TensorVariant Caffe2Importer::createTensor(const OperatorDef &op) -{ - assert(hasArgument(op.arg(), "shape") && hasArgument(op.arg(), "values")); - - const auto &shape = findArgumentByName(op.arg(), "shape"); - const auto &values = findArgumentByName(op.arg(), "values"); - - mir::DTYPE element_type; - const SupportedCaffe2OpType op_type = _operatorTypes.at(op.type()); - const void *src_data; - // if values on floats - if (!values.floats().empty()) - { - element_type = mir::DTYPE::FLOAT32; - src_data = values.floats().data(); - } - else + // `outputs` can be empty if constant input was not processed. + if (!outputs.empty()) { - assert(!values.ints().empty()); - if (op_type == SupportedCaffe2OpType::givenTensorInt64Fill) - { - element_type = mir::DTYPE::INT64; - } - else - { - element_type = mir::DTYPE::INT32; - } - src_data = values.ints().data(); + _lastMIROp = outputs.at(0)->getNode(); } - - mir::Shape tensor_shape(shape.ints_size()); - - for (int i = 0; i < shape.ints_size(); ++i) - { - tensor_shape.dim(i) = shape.ints(i); - } - - return mir::TensorVariant(element_type, tensor_shape, src_data); } std::vector Caffe2Importer::getInputMIROps(const OperatorDef &op) { - // caffe2 operation inputs not same as MIR inputs (ex: in caffe2 conv kernel and bias also inputs) - // so choose caffe2 inputs, which are 'real' inputs std::vector inputs; - SupportedCaffe2OpType opType = _operatorTypes.at(op.type()); - if (opType != SupportedCaffe2OpType::givenTensorFill && - opType != SupportedCaffe2OpType::constantFill && - opType != SupportedCaffe2OpType::givenTensorInt64Fill) + + for (const auto &input_name : op.input()) { - for (auto &i : op.input()) - if (_blobNameToOutput.find(i) != _blobNameToOutput.end()) - inputs.push_back(_blobNameToOutput[i]); + if (_blobNameToOutput.find(input_name) == _blobNameToOutput.end()) + throw std::runtime_error("Cannot find blob \"" + input_name + "\"."); + inputs.push_back(_blobNameToOutput[input_name]); } return inputs; diff --git a/compiler/mir-caffe2-importer/caffe2_importer.h b/compiler/mir-caffe2-importer/caffe2_importer.h index 5da1199..cd7ad6e 100644 --- a/compiler/mir-caffe2-importer/caffe2_importer.h +++ b/compiler/mir-caffe2-importer/caffe2_importer.h @@ -43,7 +43,8 @@ private: std::string _predictNet; std::string _initNet; std::unique_ptr _graph; - std::unique_ptr<::caffe2::NetDef> _net; + std::unique_ptr _predict_net; + std::unique_ptr _init_net; std::unique_ptr _opCreator; std::vector _inputShapes; @@ -53,8 +54,6 @@ private: std::unordered_map _blobNameToOutput; mir::Operation *_lastMIROp = nullptr; - std::map _MIRTensors; - void import(); std::unique_ptr createIR(); @@ -70,16 +69,6 @@ private: void createMIRNodesFromOp(const ::caffe2::OperatorDef &op); /** - * @brief Since caffe2 tensor values stored separately (in init_net) - preload them in _MIRTensors - */ - void preloadAllTensors(); - - /** - * @brief Creates MIR tensor from caffe2 givenTensorFill op - */ - mir::TensorVariant createTensor(const ::caffe2::OperatorDef &op); - - /** * @brief Returns MIR operation outputs corresponding to the inputs of the given operator. */ std::vector getInputMIROps(const ::caffe2::OperatorDef &op); diff --git a/compiler/mir-caffe2-importer/caffe2_op_creator.cpp b/compiler/mir-caffe2-importer/caffe2_op_creator.cpp index 684f867..038f746 100644 --- a/compiler/mir-caffe2-importer/caffe2_op_creator.cpp +++ b/compiler/mir-caffe2-importer/caffe2_op_creator.cpp @@ -44,7 +44,6 @@ #include "mir/TensorUtil.h" #include -#include #include namespace nnc @@ -273,34 +272,79 @@ static void checkConvLikeOp(const ::caffe2::OperatorDef &op) "If one custom kernel size specified - all custom kernel sizes must be specified"); } +static mir::TensorVariant createTensor(const OperatorDef &op) +{ + assert(hasArgument(op.arg(), "shape") && hasArgument(op.arg(), "values")); + + const auto &shape = findArgumentByName(op.arg(), "shape"); + const auto &values = findArgumentByName(op.arg(), "values"); + + mir::DTYPE element_type; + const void *src_data; + // if values on floats + if (!values.floats().empty()) + { + element_type = mir::DTYPE::FLOAT32; + src_data = values.floats().data(); + } + else + { + assert(!values.ints().empty()); + if (op.type() == "GivenTensorInt64Fill") + { + element_type = mir::DTYPE::INT64; + } + else + { + element_type = mir::DTYPE::INT32; + } + src_data = values.ints().data(); + } + + mir::Shape tensor_shape(shape.ints_size()); + + for (int i = 0; i < shape.ints_size(); ++i) + { + tensor_shape.dim(i) = shape.ints(i); + } + + return mir::TensorVariant(element_type, tensor_shape, src_data); +} + // // Convert functions // std::vector -Caffe2OpCreator::convertAdd(const std::vector &inputs, - const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors) +Caffe2OpCreator::convertConstant(const std::vector &inputs, + const ::caffe2::OperatorDef &op) { - checkLayout(op); + // Constant may not contain any data if it is a fake input. + if (!hasArgument(op.arg(), "values")) + return {}; - std::vector add_input; - add_input.reserve(inputs.size() + 1); - for (const auto &i : inputs) - add_input.emplace_back(convertCaffeToMIR(i)); + return {createOp("", createTensor(op))->getOutput(0)}; +} - // check mir tensors contain operand - if (mir_tensors.find(op.input(1)) != mir_tensors.end()) +std::vector +Caffe2OpCreator::convertAdd(const std::vector &inputs, + const ::caffe2::OperatorDef &op) +{ + if (getSingleArgument(op, "broadcast", 0) != 0) { - auto next_input = createOp("Constant", mir_tensors.at(op.input(1))); - add_input.emplace_back(next_input->getOutput(0)); - } + // FIXME This only works when 'axis' == 1 and the second input is 1-D. + std::vector transposed_inputs{convertCaffeToMIR(inputs[0]), + inputs[1]}; + auto result = createOp("Elementwise_Add", transposed_inputs, + ops::ElementwiseOp::OpType::add); - auto add = - createOp("Elementwise_Add", add_input, ops::ElementwiseOp::OpType::add); + return {convertMIRToCaffe(result->getOutput(0))}; + } - return {convertMIRToCaffe(add->getOutput(0))}; + auto result = + createOp("Elementwise_Add", inputs, ops::ElementwiseOp::OpType::add); + return {result->getOutput(0)}; } - std::vector Caffe2OpCreator::convertAveragePool(const std::vector &inputs, const OperatorDef &op) @@ -325,17 +369,20 @@ Caffe2OpCreator::convertAveragePool(const std::vector std::vector Caffe2OpCreator::convertConv(const std::vector &inputs, - const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors) + const ::caffe2::OperatorDef &op) { - checkConvLikeOp(op); - // dilation order: h w (not used) Shape stride_shape(getStrides(op)); std::vector pad_before, pad_after; std::tie(pad_before, pad_after) = getPadding(op); - auto kernel_tensor = transposeTensor<2, 3, 1, 0>(mir_tensors.at(op.input(1))); + auto kernel_op = dynamic_cast(inputs[1]->getNode()); + if (kernel_op == nullptr) + throw std::runtime_error("Conv: non-constant kernels is not supported yet."); + + // [M, C, kH, kW] -> [kH, kW, C, M] + auto kernel_tensor = transposeTensor<2, 3, 1, 0>(kernel_op->getValue()); auto in_group_size = kernel_tensor.getShape().dim(2); auto out_channels = kernel_tensor.getShape().dim(3); int num_groups = getSingleArgument(op, "group", 1); @@ -362,9 +409,8 @@ Caffe2OpCreator::convertConv(const std::vector &inputs } if (op.input_size() > 2) - { // Bias is optional - auto bias = createOp("Constant", mir_tensors.at(op.input(2)))->getOutput(0); - result = createOp("Bias_Add", result->getOutput(0), bias); + { + result = createOp("Bias_Add", result->getOutput(0), inputs[2]); } return {convertMIRToCaffe(result->getOutput(0))}; @@ -376,6 +422,7 @@ Caffe2OpCreator::convertConcat(const std::vector &inpu { checkLayout(op); + // `1` corresponds to the default (channels) axis. int axis = getSingleArgument(op, "axis", 1); auto result = createOp("Concat", inputs, axis); return {result->getOutput(0)}; @@ -395,15 +442,18 @@ Caffe2OpCreator::convertDropout(const std::vector &inp } std::vector -Caffe2OpCreator::convertFullyConnected(const std::vector &inputs, - const ::caffe2::OperatorDef &op, - const MIRTensors &mir_tensors) +Caffe2OpCreator::convertFC(const std::vector &inputs, + const ::caffe2::OperatorDef &op) { for (auto &s : {"axis", "axis_w", "float16_compute"}) if (hasArgument(op.arg(), s)) throw std::runtime_error(std::string("FC: only default '") + s + "' value is supported"); - auto weights_tensor = transposeTensor<1, 0>(mir_tensors.at(op.input(1))); + auto weights_op = dynamic_cast(inputs[1]->getNode()); + if (weights_op == nullptr) + throw std::runtime_error("FC: non-constant weights is not supported yet."); + + auto weights_tensor = transposeTensor<1, 0>(weights_op->getValue()); const auto &input_shape = inputs[0]->getShape(); // Transform input into 2-D tensor by flattening axes @@ -412,8 +462,7 @@ Caffe2OpCreator::convertFullyConnected(const std::vector("Reshape", inputs[0], shape); auto weights = createOp("Constant", weights_tensor)->getOutput(0); auto result = createOp("Fully_Connected", reshape->getOutput(0), weights); - auto bias = createOp("Constant", mir_tensors.at(op.input(2)))->getOutput(0); - result = createOp("Bias_Add", result->getOutput(0), bias); + result = createOp("Bias_Add", result->getOutput(0), inputs[2]); return {result->getOutput(0)}; } @@ -441,26 +490,22 @@ Caffe2OpCreator::convertMaxPool(const std::vector &inp std::vector Caffe2OpCreator::convertMul(const std::vector &inputs, - const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors) + const ::caffe2::OperatorDef &op) { - checkLayout(op); - - std::vector input_descriptors; - input_descriptors.reserve(inputs.size() + 1); - for (const auto &i : inputs) - input_descriptors.emplace_back(convertCaffeToMIR(i)); - - // TODO: replace ConstantOp on inputs - if (mir_tensors.find(op.input(1)) != mir_tensors.end()) + if (getSingleArgument(op, "broadcast", 0) != 0) { - auto const_tensor = createOp("Constant", mir_tensors.at(op.input(1))); - input_descriptors.emplace_back(const_tensor->getOutput(0)); - } + // FIXME This only works when `axis` == 1 and the second input is 1-D. + std::vector transposed_inputs{convertCaffeToMIR(inputs[0]), + inputs[1]}; + auto result = createOp("Elementwise_Mul", transposed_inputs, + ops::ElementwiseOp::OpType::mul); - auto mul = createOp("Elementwise_Mul", input_descriptors, - ops::ElementwiseOp::OpType::mul); + return {convertMIRToCaffe(result->getOutput(0))}; + } - return {convertMIRToCaffe(mul->getOutput(0))}; + auto result = + createOp("Elementwise_Mul", inputs, ops::ElementwiseOp::OpType::mul); + return {result->getOutput(0)}; } std::vector @@ -505,7 +550,7 @@ Caffe2OpCreator::convertSoftmax(const std::vector &inp std::vector Caffe2OpCreator::convertSpatialBN(const std::vector &inputs, - const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors) + const ::caffe2::OperatorDef &op) { checkLayout(op); @@ -518,10 +563,18 @@ Caffe2OpCreator::convertSpatialBN(const std::vector &i // overall_res = (X - mean) / sqrt(var + epsilon) * scale + bias - const auto &scale_tensor = mir_tensors.at(op.input(1)); - const auto &bias_tensor = mir_tensors.at(op.input(2)); - const auto &mean_tensor = mir_tensors.at(op.input(3)); - const auto &var_tensor = mir_tensors.at(op.input(4)); + auto scale_op = dynamic_cast(inputs[1]->getNode()); + auto bias_op = dynamic_cast(inputs[2]->getNode()); + auto mean_op = dynamic_cast(inputs[3]->getNode()); + auto var_op = dynamic_cast(inputs[4]->getNode()); + if (scale_op == nullptr || bias_op == nullptr || mean_op == nullptr || var_op == nullptr) + throw std::runtime_error( + "SpatialBN: non-constant 'scale', 'bias', 'mean' and 'var' inputs are not supported yet."); + + const auto &scale_tensor = scale_op->getValue(); + const auto &bias_tensor = bias_op->getValue(); + const auto &mean_tensor = mean_op->getValue(); + const auto &var_tensor = var_op->getValue(); float eps = getSingleArgument(op, "epsilon", 1e-5f); // res1 = X - mean @@ -574,11 +627,13 @@ Caffe2OpCreator::convertClip(const std::vector &inputs std::vector Caffe2OpCreator::convertReshape(const std::vector &inputs, - const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors) + const ::caffe2::OperatorDef &op) { - // Check new shape input - assert(mir_tensors.find(op.input(1)) != mir_tensors.end()); - const auto &shape_tensor = mir_tensors.at(op.input(1)); + auto shape_op = dynamic_cast(inputs[1]->getNode()); + if (shape_op == nullptr) + throw std::runtime_error("Reshape: non-constant shape is not supported yet."); + + const auto &shape_tensor = shape_op->getValue(); Tensor out_shape_tensor(shape_tensor); @@ -595,11 +650,9 @@ Caffe2OpCreator::convertReshape(const std::vector &inp return {reshape->getOutput(0)}; } -std::vector Caffe2OpCreator::createInput(const std::string &name, - const mir::Shape &shape) +Operation::Output *Caffe2OpCreator::createInput(const std::string &name, const mir::Shape &shape) { - auto variable = _graph->create(name, shape); - return {variable->getOutput(0)}; + return _graph->create(name, shape)->getOutput(0); } } // namespace nnc diff --git a/compiler/mir-caffe2-importer/caffe2_op_creator.h b/compiler/mir-caffe2-importer/caffe2_op_creator.h index f8e50d4..675e9a0 100644 --- a/compiler/mir-caffe2-importer/caffe2_op_creator.h +++ b/compiler/mir-caffe2-importer/caffe2_op_creator.h @@ -33,30 +33,29 @@ namespace nnc { -using mir::Graph; using mir::Operation; using mir::Shape; -using MIRTensors = const std::map; class Caffe2OpCreator { public: - explicit Caffe2OpCreator(Graph *g) : _graph(g){}; + explicit Caffe2OpCreator(mir::Graph *g) : _graph(g) {} - std::vector createInput(const std::string &name, - const mir::Shape &shape); + Operation::Output *createInput(const std::string &name, const mir::Shape &shape); std::vector - convertAdd(const std::vector &inputs, const ::caffe2::OperatorDef &op, - const MIRTensors &mir_tensors); + convertConstant(const std::vector &inputs, + const ::caffe2::OperatorDef &op); + + std::vector + convertAdd(const std::vector &inputs, const ::caffe2::OperatorDef &op); std::vector convertAveragePool(const std::vector &inputs, const ::caffe2::OperatorDef &op); std::vector - convertConv(const std::vector &inputs, const ::caffe2::OperatorDef &op, - const MIRTensors &mir_tensors); + convertConv(const std::vector &inputs, const ::caffe2::OperatorDef &op); std::vector convertConcat(const std::vector &inputs, @@ -67,16 +66,14 @@ public: const ::caffe2::OperatorDef &op); std::vector - convertFullyConnected(const std::vector &inputs, - const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors); + convertFC(const std::vector &inputs, const ::caffe2::OperatorDef &op); std::vector convertMaxPool(const std::vector &inputs, const ::caffe2::OperatorDef &op); std::vector - convertMul(const std::vector &inputs, const ::caffe2::OperatorDef &op, - const MIRTensors &mir_tensors); + convertMul(const std::vector &inputs, const ::caffe2::OperatorDef &op); std::vector convertRelu(const std::vector &inputs); @@ -94,7 +91,7 @@ public: std::vector convertSpatialBN(const std::vector &inputs, - const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors); + const ::caffe2::OperatorDef &op); std::vector convertSum(const std::vector &inputs); @@ -104,10 +101,10 @@ public: std::vector convertReshape(const std::vector &inputs, - const ::caffe2::OperatorDef &op, const MIRTensors &mir_tensors); + const ::caffe2::OperatorDef &op); private: - Graph *_graph = nullptr; + mir::Graph *_graph = nullptr; mir::Operation::Output *convertCaffeToMIR(mir::Operation::Output *arg); -- 2.7.4