From 7dc6b1d7d4bb2f7930e36deba7ef1cbaee9591b7 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Mon, 28 Aug 2017 17:37:09 +0300 Subject: [PATCH] Layers for OpenFace face recognition network --- modules/dnn/include/opencv2/dnn/all_layers.hpp | 16 +++ modules/dnn/src/dnn.cpp | 2 +- modules/dnn/src/init.cpp | 1 + modules/dnn/src/layers/concat_layer.cpp | 39 +++++-- modules/dnn/src/layers/convolution_layer.cpp | 2 +- modules/dnn/src/layers/lp_normalize_layer.cpp | 78 +++++++++++++ modules/dnn/src/layers/pooling_layer.cpp | 11 +- modules/dnn/src/layers/reshape_layer.cpp | 17 ++- modules/dnn/src/torch/torch_importer.cpp | 149 +++++++++++++++++++++++-- modules/dnn/test/test_torch_importer.cpp | 47 +++++++- 10 files changed, 328 insertions(+), 34 deletions(-) create mode 100644 modules/dnn/src/layers/lp_normalize_layer.cpp diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 333656a..ef61efd 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -245,6 +245,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN bool globalPooling; bool computeMaxIdx; String padMode; + bool ceilMode; static Ptr create(const LayerParams& params); }; @@ -257,6 +258,14 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN static Ptr create(const LayerParams& params); }; + class CV_EXPORTS LPNormalizeLayer : public Layer + { + public: + float pnorm, epsilon; + + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS InnerProductLayer : public Layer { public: @@ -294,6 +303,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN { public: int axis; + /** + * @brief Add zero padding in case of concatenation of blobs with different + * spatial sizes. + * + * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat + */ + bool padding; static Ptr create(const LayerParams ¶ms); }; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index e4671ec..3714fa4 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1177,7 +1177,7 @@ struct Net::Impl // (and so we eliminate the concatenation layer, because the channels // are concatenated implicitly). Ptr concatLayer = ld.layerInstance.dynamicCast(); - if( !concatLayer.empty() && concatLayer->axis == 1 && + if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding && ld.outputBlobs.size() == 1 ) { Mat& output = ld.outputBlobs[0]; diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 32ff69e..303e05f 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -91,6 +91,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(InnerProduct, InnerProductLayer); CV_DNN_REGISTER_LAYER_CLASS(Softmax, SoftmaxLayer); CV_DNN_REGISTER_LAYER_CLASS(MVN, MVNLayer); + CV_DNN_REGISTER_LAYER_CLASS(LPNormalize, LPNormalizeLayer); CV_DNN_REGISTER_LAYER_CLASS(ReLU, ReLULayer); CV_DNN_REGISTER_LAYER_CLASS(ChannelsPReLU, ChannelsPReLULayer); diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 662be1d..7696528 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -56,6 +56,7 @@ public: { setParamsFrom(params); axis = params.get("axis", 1); + padding = params.get("padding", false); } virtual bool getMemoryShapes(const std::vector &inputs, @@ -64,8 +65,7 @@ public: std::vector &internals) const { CV_Assert(inputs.size() > 0); - outputs.clear(); - outputs.push_back(inputs[0]); + outputs.resize(1, inputs[0]); int cAxis = clamp(axis, inputs[0]); int axisSum = 0; @@ -73,25 +73,33 @@ public: { MatShape curShape = inputs[i]; - CV_Assert(curShape.size() == outputs.back().size()); - for (int curAxis = 0; curAxis < outputs.back().size(); curAxis++) + if (padding) { - if (curAxis != cAxis && outputs.back()[curAxis] != curShape[curAxis]) - CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer"); + for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++) + { + outputs[0][curAxis] = std::max(outputs[0][curAxis], curShape[curAxis]); + } + } + else + { + CV_Assert(curShape.size() == outputs[0].size()); + for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++) + { + if (curAxis != cAxis && outputs[0][curAxis] != curShape[curAxis]) + CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer"); + } } axisSum += curShape[cAxis]; } - - outputs.back()[cAxis] = axisSum; - + outputs[0][cAxis] = axisSum; return false; } virtual bool supportBackend(int backendId) { return backendId == DNN_BACKEND_DEFAULT || - backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1; // By channels + backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding; // By channels } class ChannelConcatInvoker : public ParallelLoopBody @@ -174,7 +182,10 @@ public: int cAxis = clamp(axis, inputs[0]->dims); Mat& outMat = outputs[0]; - if( cAxis == 1 && outMat.dims == 4 ) + if (padding) + outMat.setTo(0); + + if( cAxis == 1 && outMat.dims == 4 && !padding) { int nstripes = getNumThreads(); ChannelConcatInvoker::run(inputs, outMat, nstripes); @@ -187,6 +198,12 @@ public: for (size_t i = 0; i < inputs.size(); i++) { ranges[cAxis].end = ranges[cAxis].start + inputs[i]->size[cAxis]; + for (int j = 0; j < outMat.dims; ++j) + { + if (j == cAxis) continue; + ranges[j].start = (outMat.size[j] - inputs[i]->size[j]) / 2; + ranges[j].end = ranges[j].start + inputs[i]->size[j]; + } inputs[i]->copyTo(outMat(&ranges[0])); ranges[cAxis].start = ranges[cAxis].end; } diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 4bf829c..a0b8b7a 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -187,7 +187,7 @@ public: } int ngroups = inpCn / blobs[0].size[1]; - CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0); + CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0); int dims[] = {inputs[0][0], outCn, out.height, out.width}; outputs.resize(inputs.size(), shape(dims)); diff --git a/modules/dnn/src/layers/lp_normalize_layer.cpp b/modules/dnn/src/layers/lp_normalize_layer.cpp new file mode 100644 index 0000000..9efb9b8 --- /dev/null +++ b/modules/dnn/src/layers/lp_normalize_layer.cpp @@ -0,0 +1,78 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2017, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +#include "../precomp.hpp" +#include "layers_common.hpp" +#include +namespace cv { namespace dnn { + +class LPNormalizeLayerImpl : public LPNormalizeLayer +{ +public: + + LPNormalizeLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + pnorm = params.get("p", 2); + epsilon = params.get("eps", 1e-10f); + CV_Assert(pnorm > 0); + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const + { + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + if (pnorm != 1 && pnorm != 2) + { + internals.resize(1, inputs[0]); + } + return true; + } + + virtual bool supportBackend(int backendId) + { + return backendId == DNN_BACKEND_DEFAULT; + } + + void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + CV_Assert(inputs[0]->total() == outputs[0].total()); + float norm; + if (pnorm == 1) + norm = cv::norm(*inputs[0], NORM_L1); + else if (pnorm == 2) + norm = cv::norm(*inputs[0], NORM_L2); + else + { + pow(abs(*inputs[0]), pnorm, internals[0]); + norm = pow(sum(internals[0])[0], 1.0f / pnorm); + } + multiply(*inputs[0], 1.0f / (norm + epsilon), outputs[0]); + } + + int64 getFLOPS(const std::vector &inputs, + const std::vector &) const + { + int64 flops = 0; + for (int i = 0; i < inputs.size(); i++) + flops += 3 * total(inputs[i]); + return flops; + } +}; + +Ptr LPNormalizeLayer::create(const LayerParams& params) +{ + return Ptr(new LPNormalizeLayerImpl(params)); +} + +} // namespace dnn +} // namespace cv diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index ce99552..e9fc1d6 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -54,7 +54,6 @@ namespace cv namespace dnn { -//TODO: add ceil_mode param class PoolingLayerImpl : public PoolingLayer { public: @@ -79,6 +78,7 @@ public: getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, pad.height, pad.width, stride.height, stride.width, padMode); setParamsFrom(params); + ceilMode = params.get("ceil_mode", true); } void finalize(const std::vector &inputs, std::vector &outputs) @@ -572,11 +572,10 @@ public: } else if (padMode.empty()) { - //Yeah, something strange Caffe scheme-) - out.height = static_cast(ceil(static_cast(in.height + 2 * pad.height - - kernel.height) / stride.height)) + 1; - out.width = static_cast(ceil(static_cast(in.width + 2 * pad.width - - kernel.width) / stride.width)) + 1; + float height = (float)(in.height + 2 * pad.height - kernel.height) / stride.height; + float width = (float)(in.width + 2 * pad.width - kernel.width) / stride.width; + out.height = 1 + (ceilMode ? ceil(height) : floor(height)); + out.width = 1 + (ceilMode ? ceil(width) : floor(width)); if (pad.height || pad.width) { diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index e5212fb..802e9ee 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -75,12 +75,21 @@ static void computeShapeByReshapeMask(const MatShape &srcShape, if (explicitMask) { int maskTotal = total(maskShape); - for (int i = srcRange.start + 1; i < srcRange.end; ++i) + // Go from the end of mask until we collect required total. + bool matched = false; + for (int i = srcRange.end - 1; i >= srcRange.start; --i) { - if (total(srcShape, i, srcRange.end) != maskTotal) + if (matched) { - srcRange.start = i - 1; - break; + if (i == 0 || total(srcShape, i, srcRange.end) != maskTotal) + { + srcRange.start = i + 1; + break; + } + } + else + { + matched = total(srcShape, i, srcRange.end) == maskTotal; } } CV_Assert(total(srcShape, srcRange.start, srcRange.end) == maskTotal); diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 44fcd8c..925de05 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -309,6 +309,7 @@ struct TorchImporter : public ::cv::dnn::Importer if (vtype == TYPE_TORCH) { int index = readInt(); + int numModules = curModule->modules.size(); readTorchObject(index); if (tensors.count(index)) //tensor was readed @@ -324,6 +325,14 @@ struct TorchImporter : public ::cv::dnn::Importer DictValue scalar = DictValue::arrayReal(matCasted.ptr(), matCasted.total()); scalarParams.set(key, scalar); } + else + { + // Only tensors and scalars are supported for table fields. + // i.e. nn.Inception has field `transfer` which is an + // activation layer. So we remove added modules as readTorchObject(index). + while (curModule->modules.size() > numModules) + curModule->modules.pop_back(); + } } else if (vtype == TYPE_NUMBER) { @@ -469,7 +478,8 @@ struct TorchImporter : public ::cv::dnn::Importer layerParams.set("torch_index", index); if (nnName == "Sequential" || nnName == "Parallel" || - nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable") + nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable" || + nnName == "DepthConcat" || nnName == "Inception") { Module *parentModule = curModule; curModule->modules.push_back(newModule); @@ -490,8 +500,12 @@ struct TorchImporter : public ::cv::dnn::Importer { layerParams.set("dimension", scalarParams.get("dimension")); } + if (nnName == "DepthConcat") + { + layerParams.set("dimension", scalarParams.get("dimension")); + } } - else if (nnName == "SpatialConvolution") + else if (nnName == "SpatialConvolution" || nnName == "SpatialConvolutionMM") { newModule->apiType = "Convolution"; readTorchTable(scalarParams, tensorParams); @@ -507,8 +521,37 @@ struct TorchImporter : public ::cv::dnn::Importer layerParams.set("num_output", scalarParams.get("nOutputPlane")); convertTorchKernelsParams(scalarParams, layerParams); + if (nnName == "SpatialConvolutionMM") + { + // Split weights from a [ outCh x inCh*kH*kW ] 2D matrix + // onto a 4D [ outCh x inCh x kH x kW ] blob. + CV_Assert(layerParams.blobs[0].dims == 2); + const int kernel = layerParams.blobs[0].size[1]; // inCh * kH * kW + MatShape kernelShape(4); + kernelShape[0] = layerParams.blobs[0].size[0]; // outCh. + kernelShape[2] = layerParams.get("kernel_h"); + kernelShape[3] = layerParams.get("kernel_w"); + kernelShape[1] = kernel / (kernelShape[2] * kernelShape[3]); // inCh. + layerParams.blobs[0] = layerParams.blobs[0].reshape(1, kernelShape); + } curModule->modules.push_back(newModule); } + else if (nnName == "SpatialLPPooling") + { + // nn.Sequential { + // [input -> (1) -> (2) -> output] + // (1): nn.Sequential { + // [input -> (1) -> (2) -> (3) -> (4) -> output] + // (1): nn.Power + // (2): nn.SpatialAveragePooling(...) + // (3): nn.MulConstant + // (4): nn.Power + // } + // (2): nn.Sigmoid + // } + // nn.SpatialLPPooling is just a table so we skip it. + readTorchTable(scalarParams, tensorParams); + } else if (nnName == "SpatialMaxPooling" || nnName == "SpatialAveragePooling") { newModule->apiType = "Pooling"; @@ -522,6 +565,9 @@ struct TorchImporter : public ::cv::dnn::Importer layerParams.set("pool", "AVE"); convertTorchKernelsParams(scalarParams, layerParams); + CV_Assert(scalarParams.has("ceil_mode")); + layerParams.set("ceil_mode", scalarParams.get("ceil_mode")); + curModule->modules.push_back(newModule); } else if (nnName == "Linear") @@ -541,7 +587,7 @@ struct TorchImporter : public ::cv::dnn::Importer layerParams.set("num_output", weightBlob.size[0]); curModule->modules.push_back(newModule); } - else if (nnName == "Reshape") + else if (nnName == "Reshape" || nnName == "View") { newModule->apiType = "Reshape"; @@ -576,15 +622,24 @@ struct TorchImporter : public ::cv::dnn::Importer newModule->apiType = "BatchNorm"; readTorchTable(scalarParams, tensorParams); - CV_Assert(tensorParams.count("running_var") && - tensorParams.count("running_mean")); - layerParams.blobs.push_back(tensorParams["running_mean"].second); - layerParams.blobs.push_back(tensorParams["running_var"].second); - CV_Assert(scalarParams.has("eps")); float eps = float(scalarParams.get("eps")); layerParams.set("eps", eps); + CV_Assert((tensorParams.count("running_var") || tensorParams.count("running_std")) && + tensorParams.count("running_mean")); + layerParams.blobs.push_back(tensorParams["running_mean"].second); + if (tensorParams.count("running_var")) + { + layerParams.blobs.push_back(tensorParams["running_var"].second); + } + else + { + layerParams.blobs.push_back(tensorParams["running_std"].second); + pow(layerParams.blobs.back(), -2, layerParams.blobs.back()); + subtract(layerParams.blobs.back(), eps, layerParams.blobs.back()); + } + if (tensorParams.count("weight")) { layerParams.set("has_weight", true); @@ -642,6 +697,18 @@ struct TorchImporter : public ::cv::dnn::Importer newModule->apiType = "Identity"; curModule->modules.push_back(newModule); } + else if (nnName == "Normalize") + { + readTorchTable(scalarParams, tensorParams); + CV_Assert(scalarParams.has("p")); + + layerParams.set("p", scalarParams.get("p")); + if (scalarParams.has("eps")) + layerParams.set("eps", scalarParams.get("eps")); + + newModule->apiType = "LPNormalize"; + curModule->modules.push_back(newModule); + } else if (nnName == "Padding") { readTorchTable(scalarParams, tensorParams); @@ -760,6 +827,46 @@ struct TorchImporter : public ::cv::dnn::Importer layerParams.set("log_softmax", true); curModule->modules.push_back(newModule); } + else if (nnName == "SpatialCrossMapLRN") + { + newModule->apiType = "LRN"; + readTorchTable(scalarParams, tensorParams); + + CV_Assert(scalarParams.has("alpha")); + CV_Assert(scalarParams.has("beta")); + CV_Assert(scalarParams.has("k")); + CV_Assert(scalarParams.has("size")); + + layerParams.set("norm_region", "ACROSS_CHANNELS"); + layerParams.set("alpha", scalarParams.get("alpha")); + layerParams.set("beta", scalarParams.get("beta")); + layerParams.set("bias", scalarParams.get("k")); + layerParams.set("local_size", scalarParams.get("size")); + layerParams.set("norm_by_size", true); + + curModule->modules.push_back(newModule); + } + else if (nnName == "Square" || nnName == "Sqrt" || nnName == "Power") + { + readTorchTable(scalarParams, tensorParams); + + float power; + if (nnName == "Square") power = 2.0f; + else if (nnName == "Sqrt") power = 0.5f; + else if (nnName == "Power") power = scalarParams.get("pow", 1.0f); + + newModule->apiType = "Power"; + layerParams.set("power", power); + curModule->modules.push_back(newModule); + } + else if (nnName == "MulConstant") + { + readTorchTable(scalarParams, tensorParams); + CV_Assert(scalarParams.has("constant_scalar")); + newModule->apiType = "Power"; + layerParams.set("scale", scalarParams.get("constant_scalar")); + curModule->modules.push_back(newModule); + } else { CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\""); @@ -816,7 +923,7 @@ struct TorchImporter : public ::cv::dnn::Importer } else { - if (module->thName == "Sequential") + if (module->thName == "Sequential" || module->thName == "Inception") { for (size_t i = 0; i < module->modules.size(); i++) { @@ -851,6 +958,30 @@ struct TorchImporter : public ::cv::dnn::Importer addedModules.push_back(std::make_pair(mergeId, module)); return mergeId; } + else if (module->thName == "DepthConcat") + { + int newId, mergeId; + LayerParams mergeParams; + mergeParams.set("axis", module->params.get("dimension") - 1); + mergeParams.set("padding", true); + + std::vector branchIds; + for (int i = 0; i < (int)module->modules.size(); i++) + { + newId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum); + branchIds.push_back(newId); + } + + mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams); + + for (int i = 0; i < branchIds.size(); i++) + { + net.connect(branchIds[i], 0, mergeId, i); + } + + addedModules.push_back(std::make_pair(mergeId, module)); + return mergeId; + } else if (module->thName == "Parallel") { int newId, splitId, mergeId, reshapeId; diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 5fc3c07..c46f3a9 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -56,11 +56,11 @@ using namespace cv::dnn; template static std::string _tf(TStr filename, bool inTorchDir = true) { - String path = getOpenCVExtraDir() + "/dnn/"; + String path = "dnn/"; if (inTorchDir) path += "torch/"; path += filename; - return path; + return findDataFile(path, false); } TEST(Torch_Importer, simple_read) @@ -123,6 +123,7 @@ TEST(Torch_Importer, run_reshape) runTorchNet("net_reshape"); runTorchNet("net_reshape_batch"); runTorchNet("net_reshape_single_sample"); + runTorchNet("net_reshape_channels", "", false, true); } TEST(Torch_Importer, run_linear) @@ -138,6 +139,7 @@ TEST(Torch_Importer, run_paralel) TEST(Torch_Importer, run_concat) { runTorchNet("net_concat", "l5_torchMerge"); + runTorchNet("net_depth_concat", "", false, true); } TEST(Torch_Importer, run_deconv) @@ -172,6 +174,27 @@ TEST(Torch_Importer, net_logsoftmax) runTorchNet("net_logsoftmax_spatial"); } +TEST(Torch_Importer, net_lp_pooling) +{ + runTorchNet("net_lp_pooling_square", "", false, true); + runTorchNet("net_lp_pooling_power", "", false, true); +} + +TEST(Torch_Importer, net_conv_gemm_lrn) +{ + runTorchNet("net_conv_gemm_lrn", "", false, true); +} + +TEST(Torch_Importer, net_inception_block) +{ + runTorchNet("net_inception_block", "", false, true); +} + +TEST(Torch_Importer, net_normalize) +{ + runTorchNet("net_normalize", "", false, true); +} + TEST(Torch_Importer, ENet_accuracy) { Net net; @@ -202,6 +225,26 @@ TEST(Torch_Importer, ENet_accuracy) } } +TEST(Torch_Importer, OpenFace_accuracy) +{ + const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false); + Net net = readNetFromTorch(model); + + Mat sample = imread(findDataFile("cv/shared/lena.png", false)); + Mat sampleF32(sample.size(), CV_32FC3); + sample.convertTo(sampleF32, sampleF32.type()); + sampleF32 /= 255; + resize(sampleF32, sampleF32, Size(96, 96), 0, 0, INTER_NEAREST); + + Mat inputBlob = blobFromImage(sampleF32); + + net.setInput(inputBlob); + Mat out = net.forward(); + + Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true); + normAssert(out, outRef); +} + } #endif -- 2.7.4