From 32e7ef8a3d2520e42a0ae1118872caf637ad945a Mon Sep 17 00:00:00 2001 From: Sergey Slashchinin Date: Tue, 17 Nov 2020 13:31:04 +0300 Subject: [PATCH] Add fixes and tests for different layers --- modules/dnn/include/opencv2/dnn/dnn.hpp | 3 ++ modules/dnn/src/dnn.cpp | 55 ++++++++++++++++++++++- modules/dnn/src/layers/pooling_layer.cpp | 49 ++++++++++++++------ modules/dnn/src/layers/reshape_layer.cpp | 51 ++++++++++++++++++++- modules/dnn/src/layers/slice_layer.cpp | 13 +++++- modules/dnn/src/onnx/onnx_importer.cpp | 76 +++++++++++++++++++++++++++----- modules/dnn/test/test_onnx_importer.cpp | 21 +++++++++ 7 files changed, 240 insertions(+), 28 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 98a8259..9cb7089 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -354,9 +354,12 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN const int requiredOutputs, std::vector &outputs, std::vector &internals) const; + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const {CV_UNUSED(inputs); CV_UNUSED(outputs); return 0;} + virtual bool updateMemoryShapes(const std::vector &inputs); + CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes. CV_PROP String type; //!< Type name which was used for creating layer by layer factory. CV_PROP int preferableTarget; //!< prefer target for layer forwarding diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index c789638..efafd5d 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1119,6 +1119,7 @@ struct Net::Impl : public detail::NetImplBase preferableBackend = DNN_BACKEND_DEFAULT; preferableTarget = DNN_TARGET_CPU; skipInfEngineInit = false; + hasDynamicShapes = false; } Ptr netInputLayer; @@ -1130,6 +1131,7 @@ struct Net::Impl : public detail::NetImplBase int preferableTarget; String halideConfigFile; bool skipInfEngineInit; + bool hasDynamicShapes; // Map host data to backend specific wrapper. std::map > backendWrappers; @@ -3074,6 +3076,46 @@ struct Net::Impl : public detail::NetImplBase shapes = inOutShapes[layerId]; } + void updateLayersShapes() + { + CV_Assert(!layers[0].outputBlobs.empty()); + ShapesVec inputShapes; + for(int i = 0; i < layers[0].outputBlobs.size(); i++) + { + Mat& inp = layers[0].outputBlobs[i]; + CV_Assert(inp.total()); + if (preferableBackend == DNN_BACKEND_OPENCV && + preferableTarget == DNN_TARGET_OPENCL_FP16) + { + layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); + } + inputShapes.push_back(shape(inp)); + } + LayersShapesMap layersShapes; + layersShapes[0].in = inputShapes; + for (MapIdToLayerData::iterator it = layers.begin(); + it != layers.end(); it++) + { + int layerId = it->first; + std::vector& inputLayerIds = it->second.inputBlobsId; + if (layersShapes[layerId].in.empty()) + { + for(int i = 0; i < inputLayerIds.size(); i++) + { + int inputLayerId = inputLayerIds[i].lid; + LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId); + if(inputIt == layersShapes.end() || inputIt->second.out.empty()) + { + getLayerShapesRecursively(inputLayerId, layersShapes); + } + const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid]; + layersShapes[layerId].in.push_back(shape); + } + it->second.layerInstance->updateMemoryShapes(layersShapes[layerId].in); + } + } + } + LayerPin getLatestLayerPin(const std::vector& pins) { return *std::max_element(pins.begin(), pins.end()); @@ -3487,6 +3529,8 @@ int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) int id = ++impl->lastLayerId; impl->layerNameToId.insert(std::make_pair(name, id)); impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params))); + if (params.get("has_dynamic_shapes", false)) + impl->hasDynamicShapes = true; return id; } @@ -3818,8 +3862,13 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons bool oldShape = prevShape == blobShape; blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]); - if (!oldShape) + if (!oldShape) { ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid]; + if (impl->hasDynamicShapes) + { + impl->updateLayersShapes(); + } + } if (!ld.outputBlobsWrappers[pin.oid].empty()) { @@ -4746,6 +4795,10 @@ bool Layer::getMemoryShapes(const std::vector &inputs, return false; } +bool Layer::updateMemoryShapes(const std::vector &inputs) +{ + return true; +} ////////////////////////////////////////////////////////////////////////// static Mutex& getLayerFactoryMutex() diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index fd08fdb..9841762 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -88,6 +88,9 @@ public: stride = Size(1, 1); pad_t = pad_l = pad_b = pad_r = 0; + hasDynamicShapes = params.get("has_dynamic_shapes", false); + shapesInitialized = !hasDynamicShapes; + if (params.has("pool") || params.has("kernel_size") || params.has("kernel_w") || params.has("kernel_h")) { @@ -1043,26 +1046,34 @@ virtual Ptr initNgraph(const std::vector >& inp outShape.push_back(pooledSize.height); outShape.push_back(pooledSize.width); } - else if (padMode.empty()) + else { - for (int i = 0; i < local_kernel.size(); i++) { - float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; - outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); + if (hasDynamicShapes && !shapesInitialized) + { + //Just copy input shapes for width and height to prevent errors on loading stage + for (int i = 0; i < inpShape.size(); i++) + outShape.push_back(inpShape[i]); } + else if (padMode.empty()) + { + for (int i = 0; i < local_kernel.size(); i++) { + float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; + outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); + } - // If we have padding, ensure that the last pooling starts strictly - // inside the image (instead of at the padding); otherwise clip the last. - for (int i = 0; i < pads_end.size(); i++) { - if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { - --outShape[2 + i]; - CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); + // If we have padding, ensure that the last pooling starts strictly + // inside the image (instead of at the padding); otherwise clip the last. + for (int i = 0; i < pads_end.size(); i++) { + if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { + --outShape[2 + i]; + CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); + } } + } else { + getConvPoolOutParams(inpShape, local_kernel, strides, padMode, + std::vector(local_kernel.size(), 1), outShape); } } - else - { - getConvPoolOutParams(inpShape, local_kernel, strides, padMode, std::vector(local_kernel.size(), 1), outShape); - } if (type == ROI) { CV_Assert(inputs.size() == 2); @@ -1083,6 +1094,14 @@ virtual Ptr initNgraph(const std::vector >& inp return false; } + bool updateMemoryShapes(const std::vector &inputs) CV_OVERRIDE + { + int dims = inputs[0].size(); + CV_Assert(inputs[0][dims - 1] > 0 && inputs[0][dims - 2] > 0); + shapesInitialized = true; + return true; + } + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { @@ -1114,6 +1133,8 @@ private: ROI, // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf PSROI // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf }; + bool hasDynamicShapes; + bool shapesInitialized; }; Ptr PoolingLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index a85a4e4..642e7c5 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -164,6 +164,9 @@ public: setParamsFrom(params); int axis = params.get("axis", 0); int numAxes = params.get("num_axes", -1); + hasDynamicShapes = params.get("has_dynamic_shapes", false); + shapesInitialized = !hasDynamicShapes; + CV_Assert(numAxes >= -1); newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes); @@ -176,6 +179,25 @@ public: for (i = 0; i < dims; i++) newShapeDesc[i] = paramShape.get(i); } + if (hasDynamicShapes) + { + dynamicShapes.clear(); + inputIndices.clear(); + if (params.has("dynamic_axes")) { + CV_Assert(params.has("input_indices")); + const DictValue &dynamicAxes = params.get("dynamic_axes"); + const DictValue &dynamicInputShapes = params.get("input_indices"); + int i, dims = dynamicAxes.size(); + CV_Assert(dims == dynamicInputShapes.size()); + CV_Assert(dims > 0); + dynamicShapes.resize(dims); + inputIndices.resize(dims); + for (i = 0; i < dims; i++) { + dynamicShapes[i] = dynamicAxes.get(i); + inputIndices[i] = dynamicInputShapes.get(i); + } + } + } } virtual bool supportBackend(int backendId) CV_OVERRIDE @@ -189,13 +211,21 @@ public: std::vector &outputs, std::vector &internals) const CV_OVERRIDE { + if (inputs.size() == 1 || inputs.size() == requiredOutputs) { outputs.clear(); for (size_t i = 0; i < inputs.size(); i++) { - outputs.push_back(MatShape()); - computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back()); + if (hasDynamicShapes && !shapesInitialized) + { + outputs.push_back(newShapeDesc); + } + else + { + outputs.push_back(MatShape()); + computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back()); + } } } else @@ -206,6 +236,19 @@ public: return true; } + bool updateMemoryShapes(const std::vector &inputs) CV_OVERRIDE + { + if (hasDynamicShapes) + { + for (int i = 0; i < dynamicShapes.size(); ++i) + { + newShapeDesc[dynamicShapes[i]] = inputs[0][inputIndices[i]]; + } + } + shapesInitialized = true; + return true; + } + void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE { std::vector outputs; @@ -287,6 +330,10 @@ public: private: std::vector outShapes; + std::vector dynamicShapes; // Which axes shapes are dynamic and require reinitialization with new input + std::vector inputIndices; // Which axes from input are needed to compute correct output shape + bool hasDynamicShapes; + bool shapesInitialized; }; Ptr ReshapeLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index 9994677..fd314b7 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -66,6 +66,8 @@ public: setParamsFrom(params); axis = params.get("axis", 1); num_split = params.get("num_split", 0); + hasDynamicShapes = params.get("has_dynamic_shapes", false); + shapesInitialized = !hasDynamicShapes; if (params.has("slice_point")) { CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end")); @@ -143,7 +145,8 @@ public: CV_Assert(sliceRanges[i].size() <= inpShape.size()); for (int j = 0; j < sliceRanges[i].size(); ++j) { - outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size(); + if (shapesInitialized || inpShape[j] > 0) + outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size(); } } } @@ -158,6 +161,12 @@ public: return false; } + bool updateMemoryShapes(const std::vector &inputs) CV_OVERRIDE + { + shapesInitialized = true; + return true; + } + void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE { #ifdef HAVE_OPENCL @@ -564,6 +573,8 @@ public: protected: // The actual non-negative values determined from @p sliceRanges depends on input size. std::vector > finalSliceRanges; + bool hasDynamicShapes; + bool shapesInitialized; }; class CropLayerImpl CV_FINAL : public SliceLayerImpl diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 9443336..756c8a5 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -64,6 +64,7 @@ public: ONNXImporter(Net& net, const char *onnxFile) : dstNet(net) { + hasDynamicShapes = false; CV_Assert(onnxFile); CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile); @@ -84,6 +85,7 @@ public: ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) : dstNet(net) { + hasDynamicShapes = false; CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)"); struct _Buf : public std::streambuf @@ -115,6 +117,7 @@ protected: std::map constBlobs; std::map outShapes; // List of internal blobs shapes. + bool hasDynamicShapes; // Whether the model has inputs with dynamic shapes typedef std::map::iterator IterShape_t; std::map layer_id; @@ -413,8 +416,10 @@ void ONNXImporter::populateNet() for (int j = 0; j < inpShape.size(); ++j) { inpShape[j] = tensorShape.dim(j).dim_value(); + if (!tensorShape.dim(j).dim_param().empty()) + hasDynamicShapes = true; } - if (!inpShape.empty()) + if (!inpShape.empty() && !hasDynamicShapes) { inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size } @@ -461,6 +466,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) layerParams.name = name; layerParams.type = layer_type; + layerParams.set("has_dynamic_shapes", hasDynamicShapes); if (layer_type == "MaxPool") { @@ -1276,6 +1282,20 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) { layerParams.type = "Reshape"; layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + if (hasDynamicShapes) + { + std::vector dynamicAxes; + std::vector inputIndices; + for (int index = 0; index < inpShape.size(); ++index) + { + if (!maskedAxes[index]) + inputIndices.push_back(index); + } + for (int index = 0; index < outShape.size(); ++index) + dynamicAxes.push_back(index); + layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); + layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); + } } else layerParams.type = "Identity"; @@ -1338,6 +1358,19 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) outShape.insert(outShape.begin() + axis, 1); layerParams.type = "Reshape"; layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + if (hasDynamicShapes) + { + std::vector dynamicAxes; + std::vector inputIndices; + for (int index = 0; index < outShape.size(); ++index) { + if (index != axis) + dynamicAxes.push_back(index); + } + for (int index = 0; index < inpShape.size(); ++index) + inputIndices.push_back(index); + layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); + layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); + } } else if (layer_type == "Expand") { @@ -1625,6 +1658,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size()); sliceLp.set("begin", paramBegin); sliceLp.set("end", paramEnd); + sliceLp.set("has_dynamic_shapes", hasDynamicShapes); if (inpShape.size() > 1) { @@ -1637,6 +1671,17 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) layerParams.type = "Reshape"; layerParams.set("axis", 0); layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); + if (hasDynamicShapes) + { + std::vector dynamicAxes; + std::vector inputIndices; + for (int index = 0; index < inpShape.size(); ++index) + dynamicAxes.push_back(index); + for (int index = 0; index < inpShape.size(); ++index) + inputIndices.push_back(index); + layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); + layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); + } node_proto.set_input(0, sliceLp.name); } else @@ -1676,7 +1721,11 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) for (int i = 1; i < node_proto.input_size(); i++) CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end()); - String interp_mode = layerParams.get("coordinate_transformation_mode"); + String interp_mode; + if (layerParams.has("coordinate_transformation_mode")) + interp_mode = layerParams.get("coordinate_transformation_mode"); + else + interp_mode = layerParams.get("mode"); CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); layerParams.set("align_corners", interp_mode == "align_corners"); @@ -1688,16 +1737,23 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) shapes.convertTo(shapes, CV_32S); int height = shapes.at(2); int width = shapes.at(3); - if (node_proto.input_size() == 3) + if (hasDynamicShapes) { - IterShape_t shapeIt = outShapes.find(node_proto.input(0)); - CV_Assert(shapeIt != outShapes.end()); - MatShape scales = shapeIt->second; - height *= scales[2]; - width *= scales[3]; + layerParams.set("zoom_factor_x", width); + layerParams.set("zoom_factor_y", height); + } + else + { + if (node_proto.input_size() == 3) { + IterShape_t shapeIt = outShapes.find(node_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + MatShape scales = shapeIt->second; + height *= scales[2]; + width *= scales[3]; + } + layerParams.set("width", width); + layerParams.set("height", height); } - layerParams.set("width", width); - layerParams.set("height", height); if (layerParams.get("mode") == "linear") { layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 14d2d28..9ddc17c 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -710,6 +710,27 @@ TEST_P(Test_ONNX_layers, GatherMultiOutput) testONNXModels("gather_multi_output"); } +TEST_P(Test_ONNX_layers, DynamicAxes) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } + testONNXModels("squeeze_and_conv_dynamic_axes"); + testONNXModels("unsqueeze_and_conv_dynamic_axes"); + testONNXModels("gather_dynamic_axes"); + testONNXModels("gather_scalar_dynamic_axes"); + testONNXModels("slice_dynamic_axes"); + testONNXModels("slice_opset_11_dynamic_axes"); + testONNXModels("resize_opset11_torch1.6_dynamic_axes"); + testONNXModels("average_pooling_dynamic_axes"); + testONNXModels("maxpooling_sigmoid_dynamic_axes"); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets()); class Test_ONNX_nets : public Test_ONNX_layers -- 2.7.4