From 900910812e4f2439ea0442a59ec682532b3292ae Mon Sep 17 00:00:00 2001 From: =?utf8?q?=D0=A1=D0=B5=D1=80=D0=B3=D0=B5=D0=B9=20=D0=91=D0=B0=D1=80?= =?utf8?q?=D0=B0=D0=BD=D0=BD=D0=B8=D0=BA=D0=BE=D0=B2/AI=20Tools=20Lab=20/S?= =?utf8?q?RR/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Fri, 7 Dec 2018 15:15:19 +0300 Subject: [PATCH] [nnc] Refactor weights conversions in Caffe importer (#2209) * Move conversion of `BlobShape`s to `TensorVariant`s from `createMIRNodesFromLayer to individual layer conversion methods; * Remove unnecessary type alias `IrTensor`. Signed-off-by: Sergei Barannikov --- .../include/passes/caffe_frontend/caffe_importer.h | 10 -- .../nnc/passes/caffe_frontend/caffe_importer.cpp | 74 ++--------- .../nnc/passes/caffe_frontend/caffe_op_creator.cpp | 136 +++++++++++++-------- .../nnc/passes/caffe_frontend/caffe_op_creator.h | 25 ++-- 4 files changed, 105 insertions(+), 140 deletions(-) diff --git a/contrib/nnc/include/passes/caffe_frontend/caffe_importer.h b/contrib/nnc/include/passes/caffe_frontend/caffe_importer.h index c437d9d..77b3ad7 100644 --- a/contrib/nnc/include/passes/caffe_frontend/caffe_importer.h +++ b/contrib/nnc/include/passes/caffe_frontend/caffe_importer.h @@ -95,20 +95,10 @@ private: void collectUnsupportedOp(const ::caffe::LayerParameter& lp); /** - * @brief Create MIR tensor from caffe blob - */ - std::shared_ptr createTensor(const ::caffe::BlobProto&); - - /** * @brief Return MIR IODescriptors for the inputs of the given layer. */ std::vector getMIRInputsForLayer(const ::caffe::LayerParameter& layer); - /** - * @brief Prepare Caffe layer parameters for Model IR operation creator. - */ - std::vector> createOpParams(const ::caffe::LayerParameter&); - void processDeprecatedInput(); }; diff --git a/contrib/nnc/passes/caffe_frontend/caffe_importer.cpp b/contrib/nnc/passes/caffe_frontend/caffe_importer.cpp index 1723bf8..bfb9b30 100644 --- a/contrib/nnc/passes/caffe_frontend/caffe_importer.cpp +++ b/contrib/nnc/passes/caffe_frontend/caffe_importer.cpp @@ -81,7 +81,6 @@ void CaffeImporter::collectUnsupportedLayers() { void CaffeImporter::createMIRNodesFromLayer(const LayerParameter& layer) { auto inputs = getMIRInputsForLayer(layer); - auto params = createOpParams(layer); std::vector outputs; CaffeOpType op_type = _operatorTypes.at(layer.type()); @@ -91,10 +90,10 @@ void CaffeImporter::createMIRNodesFromLayer(const LayerParameter& layer) { outputs = _opCreator->convertInput(layer); break; case CaffeOpType::convolution: - outputs = _opCreator->convertConvolution(layer, inputs, params); + outputs = _opCreator->convertConvolution(layer, inputs); break; case CaffeOpType::innerProduct: - outputs = _opCreator->convertInnerProduct(layer, inputs, params); + outputs = _opCreator->convertInnerProduct(layer, inputs); break; case CaffeOpType::pooling: outputs = _opCreator->convertPooling(layer, inputs); @@ -112,10 +111,10 @@ void CaffeImporter::createMIRNodesFromLayer(const LayerParameter& layer) { outputs = _opCreator->convertSoftmax(layer, inputs); break; case CaffeOpType::scale: - outputs = _opCreator->convertScale(layer, inputs, params); + outputs = _opCreator->convertScale(layer, inputs); break; case CaffeOpType::batchNorm: - outputs = _opCreator->convertBatchNorm(layer, inputs, params); + outputs = _opCreator->convertBatchNorm(layer, inputs); break; case CaffeOpType::dropout: outputs = _opCreator->convertDropout(layer, inputs); @@ -124,13 +123,13 @@ void CaffeImporter::createMIRNodesFromLayer(const LayerParameter& layer) { outputs = _opCreator->convertTanH(layer, inputs); break; case CaffeOpType ::ELU: - outputs = _opCreator->convertELU(layer, inputs, params); + outputs = _opCreator->convertELU(layer, inputs); break; case CaffeOpType ::eltwise: outputs = _opCreator->convertEltwise(layer, inputs); break; case CaffeOpType ::deconvolution: - outputs = _opCreator->convertDeconvolution(layer, inputs, params); + outputs = _opCreator->convertDeconvolution(layer, inputs); break; case CaffeOpType::split: outputs = _opCreator->convertSplit(layer, inputs); @@ -153,7 +152,6 @@ void CaffeImporter::collectUnsupportedOp(const LayerParameter& lp) { } CaffeOpType op_type = it->second; - std::vector> params; switch (op_type) { case CaffeOpType::concat: @@ -184,8 +182,7 @@ void CaffeImporter::collectUnsupportedOp(const LayerParameter& lp) { _opCreator->checkReLU(lp.relu_param(), _problemsOpSet); break; case CaffeOpType::batchNorm: - params = createOpParams(lp); - _opCreator->checkBatchNorm(params, _problemsOpSet); + _opCreator->checkBatchNorm(lp, _problemsOpSet); break; default: _problemsOpSet.insert(lp.type() + ": unsupported layer"); @@ -198,41 +195,6 @@ void CaffeImporter::processDeprecatedInput() { throw PassException("Deprecated Caffe input types are not supported"); } -std::shared_ptr CaffeImporter::createTensor(const BlobProto& bp) { - auto type = DTYPE::FLOAT32; - size_t element_size; - - const char* src_data; - size_t buffer_size; - - if (bp.data_size() != 0) { - assert(bp.double_data_size() == 0); - element_size = sizeof(float); - buffer_size = bp.data_size() * element_size; - src_data = reinterpret_cast(bp.data().data()); - } else if (bp.double_data_size() != 0) { - element_size = sizeof(double); - buffer_size = bp.double_data_size() * element_size; - src_data = reinterpret_cast(bp.double_data().data()); - } else { - throw PassException("No data in Caffe BlobProto, investigate"); - } - - // Create untyped tensor. Note, tensor contents will be *copied* here. - std::shared_ptr tensor_buffer_copy(new char[buffer_size], - std::default_delete()); - - char* dst_data = tensor_buffer_copy.get(); - memcpy(dst_data, src_data, buffer_size); - - Shape tensor_shape = ShapeHelper::createShape( - bp.shape().dim(), static_cast(bp.shape().dim_size())); - - auto tensor = std::make_shared(tensor_shape, tensor_buffer_copy, type, element_size); - - return tensor; -} - std::vector CaffeImporter::getMIRInputsForLayer(const LayerParameter& layer) { std::vector inputs; @@ -242,28 +204,6 @@ std::vector CaffeImporter::getMIRInputsForLayer(const LayerPa return inputs; } -std::vector> CaffeImporter::createOpParams(const LayerParameter& lp) { - std::vector> params; - - for (const auto& blob : lp.blobs()) { - - std::shared_ptr tensor = createTensor(blob); - - if (lp.has_convolution_param() && blob.shape().dim_size() == 4) { - // TODO support non default channel axis - assert(lp.convolution_param().axis() == 1 && "assuming channel axis number set to default"); - // Input x Output x Height x Width -> Height x Width x Output x Input - params.emplace_back(transposeTensor<2, 3, 1, 0>(tensor)); - } else if (lp.has_inner_product_param() && blob.shape().dim_size() == 2) { - params.emplace_back(transposeTensor<1, 0>(tensor)); - } else { - params.push_back(tensor); - } - } - - return params; -} - void CaffeImporter::setGraphOutputs() { const auto& last_layer = _net->layer(_net->layer_size() - 1); // For now, we assume that: diff --git a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp index caff041..1cbdaa8 100644 --- a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp +++ b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.cpp @@ -73,6 +73,31 @@ mir::IODescriptor CaffeOpCreator::convertMIRToCaffe(const mir::IODescriptor& arg } } +std::shared_ptr CaffeOpCreator::convertBlob(const BlobProto& blob) { + size_t element_size; + const char* src_data; + size_t buffer_size; + + if (blob.data_size() != 0) { + assert(blob.double_data_size() == 0); + element_size = sizeof(float); + buffer_size = blob.data_size() * element_size; + src_data = reinterpret_cast(blob.data().data()); + } else if (blob.double_data_size() != 0) { + element_size = sizeof(double); + buffer_size = blob.double_data_size() * element_size; + src_data = reinterpret_cast(blob.double_data().data()); + } else { + throw PassException("No data in Caffe BlobProto, investigate"); + } + + Shape shape = ShapeHelper::createShape(blob.shape().dim(), + static_cast(blob.shape().dim_size())); + std::shared_ptr data(new char[buffer_size], std::default_delete()); + std::memcpy(data.get(), src_data, buffer_size); + return std::make_shared(shape, data, DTYPE::FLOAT32, element_size); +} + std::vector CaffeOpCreator::convertInput(const LayerParameter& layer) { const auto& params = layer.input_param(); @@ -144,6 +169,9 @@ void CaffeOpCreator::checkConvolution(const ConvolutionParameter& opts, std::set& problems_op_set) { assert(opts.stride_size() <= 2); + if (opts.axis() != 1) + problems_op_set.insert("Conv2D: Unsupported axis"); + if (opts.pad_size() != 0 && (opts.has_pad_h() || opts.has_pad_w())) problems_op_set.insert("Conv2D: Conflicting padding properties"); @@ -153,39 +181,42 @@ void CaffeOpCreator::checkConvolution(const ConvolutionParameter& opts, std::vector CaffeOpCreator::convertConvolution(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params) { + const std::vector& inputs) { auto& opts = layer.convolution_param(); Shape strides; std::vector padding; convertConvolutionParam(opts, strides, padding); - std::shared_ptr unfolded_tensor = params[0]; + assert(layer.blobs(0).shape().dim_size() == 4); + auto kernel_weights = convertBlob(layer.blobs(0)); + kernel_weights = transposeTensor<2, 3, 1, 0>(kernel_weights); + Operation* conv2d; - auto in_group_size = params[0]->getShape().dim(2); - auto out_channels = params[0]->getShape().dim(3); + auto in_group_size = kernel_weights->getShape().dim(2); + auto out_channels = kernel_weights->getShape().dim(3); int32_t num_groups = opts.group(); bool is_depthwise = (num_groups != 1) && (in_group_size == 1) && (out_channels == num_groups); if (is_depthwise) { // This is depthwise convolution // TODO handle properly kernel with layer multiplier - std::shared_ptr transposed_tensor = mir::transposeTensor<0, 1, 3, 2>(params[0]); + auto transposed_tensor = transposeTensor<0, 1, 3, 2>(kernel_weights); conv2d = createOp(layer.name(), convertCaffeToMIR(inputs[0]), *transposed_tensor, strides, padding, padding); } else { if (num_groups != 1) { // first we need to convert kernel of grouped convolution to appropriate ordinary kernel - unfolded_tensor = fixGroupedKernel(opts.group(), params[0]); + kernel_weights = fixGroupedKernel(opts.group(), kernel_weights); } - conv2d = createOp(layer.name(), convertCaffeToMIR(inputs[0]), *unfolded_tensor, + conv2d = createOp(layer.name(), convertCaffeToMIR(inputs[0]), *kernel_weights, strides, padding, padding); } - // bias_term is optional (so might not be present) and defaults to true - if (!opts.has_bias_term() || opts.bias_term()) { + // Add the bias, if any. + if (opts.bias_term()) { + auto bias_weights = convertBlob(layer.blobs(1)); auto bias_add = createOp(layer.name() + ".bias", conv2d->getOutput(0), - *params[1]); + *bias_weights); return {convertMIRToCaffe(bias_add->getOutput(0))}; } else { return {convertMIRToCaffe(conv2d->getOutput(0))}; @@ -194,26 +225,28 @@ CaffeOpCreator::convertConvolution(const caffe::LayerParameter& layer, std::vector CaffeOpCreator::convertDeconvolution(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params) { + const std::vector& inputs) { auto& opts = layer.convolution_param(); Shape strides; std::vector padding; convertConvolutionParam(opts, strides, padding); - std::shared_ptr unfolded_tensor = params[0]; + auto kernel_weights = convertBlob(layer.blobs(0)); + kernel_weights = transposeTensor<2, 3, 1, 0>(kernel_weights); + if (opts.group() != 1) { // first we need to convert kernel of grouped convolution to appropriate ordinary kernel - unfolded_tensor = fixGroupedKernel(opts.group(), params[0]); + kernel_weights = fixGroupedKernel(opts.group(), kernel_weights); } auto deconv2d = createOp(layer.name(), convertCaffeToMIR(inputs[0]), - *unfolded_tensor, strides, padding); + *kernel_weights, strides, padding); // bias_term is optional (so might not be present) and defaults to true if (!opts.has_bias_term() || opts.bias_term()) { + auto bias_weights = convertBlob(layer.blobs(1)); auto bias_add = createOp(layer.name() + ".bias", deconv2d->getOutput(0), - *params[1]); + *bias_weights); return {convertMIRToCaffe(bias_add->getOutput(0))}; } else { return {convertMIRToCaffe(deconv2d->getOutput(0))}; @@ -239,24 +272,27 @@ void CaffeOpCreator::checkInnerProduct(const InnerProductParameter& opts, */ std::vector CaffeOpCreator::convertInnerProduct(const LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params) { + const std::vector& inputs) { auto& opts = layer.inner_product_param(); + auto weights = convertBlob(layer.blobs(0)); + weights = transposeTensor<1, 0>(weights); + // Add Reshape operation to make sure the input for FC operation has shape [1, fcInputSize] // It is needed because Caffe InnerProduct layer takes NCHW input and flattens the CHW part. int32_t fc_input_size = static_cast( - params[0]->getShape().numElements()) / opts.num_output(); + weights->getShape().numElements()) / opts.num_output(); auto reshape = createOp(layer.name() + ".reshape", inputs[0], Shape{1, fc_input_size}); auto fully_connected = createOp(layer.name() + ".fc", - reshape->getOutput(0), *params[0]); - - // bias_term is optional (so might not be present) and defaults to true - if (!opts.has_bias_term() || opts.bias_term()) { - auto add_op = createOp(layer.name() + ".bias", fully_connected->getOutput(0), - *params[1]); - return {add_op->getOutput(0)}; + reshape->getOutput(0), *weights); + + // Add the bias, if any. + if (opts.bias_term()) { + auto bias_weights = convertBlob(layer.blobs(1)); + auto bias_add = createOp(layer.name() + ".bias", fully_connected->getOutput(0), + *bias_weights); + return {bias_add->getOutput(0)}; } else { return {fully_connected->getOutput(0)}; } @@ -440,35 +476,39 @@ CaffeOpCreator::convertReLU(const caffe::LayerParameter& layer, std::vector CaffeOpCreator::convertScale(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params) { + const std::vector& inputs) { auto& opts = layer.scale_param(); - auto scale = createOp(layer.name(), convertCaffeToMIR(inputs[0]), *params[0]); + auto scale_weights = convertBlob(layer.blobs(0)); + auto scale = createOp(layer.name(), convertCaffeToMIR(inputs[0]), *scale_weights); - // bias_term is optional (so might not be present) and defaults to true - if (!opts.has_bias_term() || opts.bias_term()) { + // Add the bias, if any. + if (opts.bias_term()) { + auto bias_weights = convertBlob(layer.blobs(1)); auto bias_add = createOp(layer.name() + ".bias", scale->getOutput(0), - *params[1]); + *bias_weights); return {convertMIRToCaffe(bias_add->getOutput(0))}; } else { return {convertMIRToCaffe(scale->getOutput(0))}; } } -void CaffeOpCreator::checkBatchNorm(const std::vector>& params, +void CaffeOpCreator::checkBatchNorm(const caffe::LayerParameter& layer, std::set& problems_op_set) { + const auto& scale_shape = layer.blobs(2).shape(); + // Check that last blob(with scaleFactor) containing only one number - if (params[2]->getShape().rank() != 1 && params[2]->getShape().dim(0) != 1) + // FIXME This should be an assertion. + if (scale_shape.dim_size() != 1 || scale_shape.dim(0) != 1) problems_op_set.insert("Unexpected shape of scale parameter in batch norm"); } std::vector CaffeOpCreator::convertBatchNorm(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params) { + const std::vector& inputs) { auto& opts = layer.batch_norm_param(); float eps = opts.eps(); - float scale_factor = *reinterpret_cast(params[2]->at(mir::Index{0})); + auto scale_weight = convertBlob(layer.blobs(2)); + float scale_factor = *reinterpret_cast(scale_weight->at(mir::Index{0})); // Code below is taken from cpu caffe implementation: // https://github.com/BVLC/caffe/blob/master/src/caffe/layers/batch_norm_layer.cpp#L100 if (scale_factor != 0.0f) @@ -477,21 +517,22 @@ CaffeOpCreator::convertBatchNorm(const caffe::LayerParameter& layer, // create bias argument from mean: // multiply elements of mean by scaleFactor and get opposite numbers // to subtract mean from input via biasAdd operation - Tensor bias_data(*params[0]); - - for (Index idx: ShapeRange(bias_data.getShape())) + auto mean_weights = convertBlob(layer.blobs(0)); + Tensor bias_data(*mean_weights); + for (Index idx : ShapeRange(bias_data.getShape())) bias_data.at(idx) *= -scale_factor; auto bias_add = createOp(layer.name() + ".bias", convertCaffeToMIR(inputs[0]), - *params[0]); + *mean_weights); // create scale argument from variance: // multiply elements of variance by scaleFactor and // normalize biased input using scale operation - Tensor scale_data(*params[1]); - for (Index idx: ShapeRange(scale_data.getShape())) + auto variance_weights = convertBlob(layer.blobs(1)); + Tensor scale_data(*variance_weights); + for (Index idx : ShapeRange(scale_data.getShape())) scale_data.at(idx) = 1.0f / std::sqrt(scale_data.at(idx) * scale_factor + eps); - auto scale = createOp(layer.name() + ".scale", bias_add->getOutput(0), *params[1]); - + auto scale = createOp(layer.name() + ".scale", bias_add->getOutput(0), + *variance_weights); return {convertMIRToCaffe(scale->getOutput(0))}; } @@ -505,8 +546,7 @@ CaffeOpCreator::convertDropout(const caffe::LayerParameter& layer, std::vector CaffeOpCreator::convertELU(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params) { + const std::vector& inputs) { auto& opts = layer.elu_param(); auto elu = createOp(layer.name(), inputs[0], opts.alpha()); return {elu->getOutput(0)}; diff --git a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.h b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.h index 63da772..728ea76 100644 --- a/contrib/nnc/passes/caffe_frontend/caffe_op_creator.h +++ b/contrib/nnc/passes/caffe_frontend/caffe_op_creator.h @@ -31,7 +31,7 @@ namespace nnc { -using IrTensor = nnc::mir::TensorVariant; +using nnc::mir::TensorVariant; class CaffeOpCreator { public: @@ -42,13 +42,11 @@ public: std::vector convertConvolution(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params); + const std::vector& inputs); std::vector convertInnerProduct(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params); + const std::vector& inputs); std::vector convertConcat(const caffe::LayerParameter& layer, @@ -72,13 +70,11 @@ public: std::vector convertScale(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params); + const std::vector& inputs); std::vector convertBatchNorm(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params); + const std::vector& inputs); std::vector convertDropout(const caffe::LayerParameter& layer, @@ -86,13 +82,11 @@ public: std::vector convertDeconvolution(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params); + const std::vector& inputs); std::vector convertELU(const caffe::LayerParameter& layer, - const std::vector& inputs, - const std::vector>& params); + const std::vector& inputs); std::vector convertTanH(const caffe::LayerParameter& layer, @@ -116,8 +110,7 @@ public: void checkReLU(const caffe::ReLUParameter& opts, std::set&); - void checkBatchNorm(const std::vector>& params, - std::set&); + void checkBatchNorm(const caffe::LayerParameter& layer, std::set&); private: mir::Graph* _graph = nullptr; @@ -126,6 +119,8 @@ private: mir::IODescriptor convertMIRToCaffe(const mir::IODescriptor& arg); + std::shared_ptr convertBlob(const caffe::BlobProto& blob); + template mir::Operation* createOp(const std::string& name, Types&&... args); }; -- 2.7.4