1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2018, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
8 #include "../precomp.hpp"
9 #include <opencv2/dnn/shape_utils.hpp>
11 #include <opencv2/dnn/layer_reg.private.hpp>
13 #include <opencv2/core/utils/logger.defines.hpp>
14 #undef CV_LOG_STRIP_LEVEL
15 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
16 #include <opencv2/core/utils/logger.hpp>
18 #include <opencv2/core/utils/configuration.private.hpp>
29 #if defined _MSC_VER && _MSC_VER < 1910/*MSVS 2017*/
31 #pragma warning(disable: 4503) // decorated name length exceeded, name was truncated
34 #if defined(__GNUC__) && __GNUC__ >= 5
35 #pragma GCC diagnostic push
36 #pragma GCC diagnostic ignored "-Wsuggest-override"
38 #include "opencv-onnx.pb.h"
39 #if defined(__GNUC__) && __GNUC__ >= 5
40 #pragma GCC diagnostic pop
43 #include "onnx_graph_simplifier.hpp"
47 CV__DNN_INLINE_NS_BEGIN
49 extern bool DNN_DIAGNOSTICS_RUN;
51 class ONNXLayerHandler;
55 opencv_onnx::ModelProto model_proto;
59 LayerInfo(int _layerId = 0, int _outputId = 0) : layerId(_layerId), outputId(_outputId) {}
62 std::map<std::string, Mat> getGraphTensors(
63 const opencv_onnx::GraphProto& graph_proto);
64 Mat getBlob(const opencv_onnx::NodeProto& node_proto, int index);
65 Mat getBlob(const std::string& input_name);
67 LayerParams getLayerParams(const opencv_onnx::NodeProto& node_proto);
69 void addConstant(const std::string& name, const Mat& blob);
70 void addLayer(LayerParams& layerParams,
71 const opencv_onnx::NodeProto& node_proto);
72 void handleQuantizedNode(LayerParams& layerParams,
73 const opencv_onnx::NodeProto& node_proto);
75 void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
76 const std::string& input, size_t n);
77 void addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id);
79 ONNXImporter(Net& net, const char *onnxFile);
80 ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer);
85 std::unique_ptr<ONNXLayerHandler> layerHandler;
88 opencv_onnx::GraphProto graph_proto;
89 std::string framework_name;
91 std::map<std::string, Mat> constBlobs;
93 std::map<std::string, MatShape> outShapes; // List of internal blobs shapes.
94 bool hasDynamicShapes; // Whether the model has inputs with dynamic shapes
95 typedef std::map<std::string, MatShape>::iterator IterShape_t;
97 std::map<std::string, LayerInfo> layer_id;
98 typedef std::map<std::string, LayerInfo>::iterator IterLayerId_t;
100 void handleNode(const opencv_onnx::NodeProto& node_proto);
103 friend class ONNXLayerHandler;
104 typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
105 typedef std::map<std::string, ONNXImporterNodeParser> DispatchMap;
106 typedef std::map<std::string, DispatchMap> DomainDispatchMap;
108 DomainDispatchMap domain_dispatch_map;
109 std::string getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto);
110 const DispatchMap& getDispatchMap(const opencv_onnx::NodeProto& node_proto);
111 void buildDispatchMap_ONNX_AI(int opset_version);
112 void buildDispatchMap_COM_MICROSOFT(int opset_version);
114 // Domain: 'ai.onnx' (default)
115 // URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md
116 void parseArg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
117 void parseMaxUnpool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
118 void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
119 void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
120 void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
121 void parseSlice (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
122 void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
123 void parseBias (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
124 void parsePow (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
125 void parseMinMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
126 void parseNeg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
127 void parseConstant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
128 void parseLSTM (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
129 void parseGRU (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
130 void parseImageScaler (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
131 void parseClip (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
132 void parseLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
133 void parseRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
134 void parseElu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
135 void parseTanh (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
136 void parseAbs (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
137 void parseCompare (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
138 void parsePRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
139 void parseLRN (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
140 void parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
141 void parseBatchNormalization (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
142 void parseGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
143 void parseMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
144 void parseMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
145 void parseConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
146 void parseConvTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
147 void parseTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
148 void parseSqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
149 void parseFlatten (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
150 void parseUnsqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
151 void parseExpand (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
152 void parseReshape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
153 void parsePad (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
154 void parseShape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
155 void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
156 void parseConstantFill (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
157 void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
158 void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
159 void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
160 void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
161 void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
162 void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
163 void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
164 void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
166 // Domain: com.microsoft
167 // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
168 void parseQuantDequant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
169 void parseQConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
170 void parseQMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
171 void parseQEltwise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
172 void parseQLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
173 void parseQSigmoid (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
174 void parseQAvgPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
175 void parseQConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
177 // '???' domain or '???' layer type
178 void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
180 int onnx_opset; // OperatorSetIdProto for 'onnx' domain
181 std::map<std::string, int> onnx_opset_map; // map from OperatorSetIdProto
182 void parseOperatorSet();
184 const std::string str_domain_ai_onnx = "ai.onnx";
187 class ONNXLayerHandler : public detail::LayerHandler
190 explicit ONNXLayerHandler(ONNXImporter* importer_);
192 void fillRegistry(const opencv_onnx::GraphProto& net);
195 ONNXImporter* importer;
198 ONNXLayerHandler::ONNXLayerHandler(ONNXImporter* importer_) : importer(importer_){}
200 void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net)
202 int layersSize = net.node_size();
203 for (int li = 0; li < layersSize; li++) {
204 const opencv_onnx::NodeProto &node_proto = net.node(li);
205 const std::string& name = node_proto.output(0);
206 const std::string& type = node_proto.op_type();
207 const std::string& layer_type_domain = importer->getLayerTypeDomain(node_proto);
208 const auto& dispatch = importer->getDispatchMap(node_proto);
209 if (dispatch.find(type) == dispatch.end())
211 addMissing(name, cv::format("%s.%s", layer_type_domain.c_str(), type.c_str()));
217 ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
218 : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
222 hasDynamicShapes = false;
224 CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);
226 std::fstream input(onnxFile, std::ios::in | std::ios::binary);
229 CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile));
232 if (!model_proto.ParseFromIstream(&input))
234 CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile));
240 ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
241 : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
245 hasDynamicShapes = false;
246 CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
248 struct _Buf : public std::streambuf
250 _Buf(const char* buffer, size_t sizeBuffer)
252 char* p = const_cast<char*>(buffer);
253 setg(p, p, p + sizeBuffer);
257 _Buf buf(buffer, sizeBuffer);
258 std::istream input(&buf);
260 if (!model_proto.ParseFromIstream(&input))
261 CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array.");
266 inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey)
268 if (layerParams.has(oldKey)) {
269 layerParams.set(newKey, layerParams.get(oldKey));
270 layerParams.erase(oldKey);
275 void dumpValueInfoProto(int i, const opencv_onnx::ValueInfoProto& valueInfoProto, const std::string& prefix)
277 CV_Assert(valueInfoProto.has_name());
278 CV_Assert(valueInfoProto.has_type());
279 const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
280 CV_Assert(typeProto.has_tensor_type());
281 const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
282 CV_Assert(tensor.has_shape());
283 const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
285 int dim_size = tensorShape.dim_size();
286 CV_CheckGE(dim_size, 0, "");
287 MatShape shape(dim_size);
288 for (int j = 0; j < dim_size; ++j)
290 const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
291 if (dimension.has_dim_param())
293 CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
295 // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
296 if (dimension.has_denotation())
298 CV_LOG_INFO(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
300 shape[j] = dimension.dim_value();
302 CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << " as '" << valueInfoProto.name() << "'] shape=" << toString(shape));
306 void dumpTensorProto(int i, const opencv_onnx::TensorProto& tensorProto, const std::string& prefix)
308 if (utils::logging::getLogLevel() < utils::logging::LOG_LEVEL_VERBOSE)
310 int dim_size = tensorProto.dims_size();
311 CV_CheckGE(dim_size, 0, "");
312 MatShape shape(dim_size);
313 for (int j = 0; j < dim_size; ++j)
315 int sz = static_cast<int>(tensorProto.dims(j));
318 CV_LOG_VERBOSE(NULL, 0, "DNN/ONNX: " << prefix << "[" << i << " as '" << tensorProto.name() << "'] shape=" << toString(shape) << " data_type=" << (int)tensorProto.data_type());
321 void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto)
323 if (!tensor_proto.raw_data().empty()) {
324 delete tensor_proto.release_raw_data();
328 void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
329 std::vector<Mat>& outputs)
331 Ptr<Layer> layer = LayerFactory::createLayerInstance(params.type, params);
332 CV_Assert((bool)layer);
334 std::vector<MatShape> inpShapes(inputs.size());
335 int ddepth = params.get<int>("depth", CV_32F);
336 for (size_t i = 0; i < inputs.size(); ++i)
338 inpShapes[i] = shape(inputs[i]);
339 if (i > 0 && ddepth != inputs[i].depth())
340 CV_Error(Error::StsNotImplemented, "Mixed input data types.");
341 ddepth = inputs[i].depth();
344 std::vector<MatShape> outShapes, internalShapes;
345 layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes);
347 std::vector<Mat> internals(internalShapes.size());
348 outputs.resize(outShapes.size());
349 for (size_t i = 0; i < outShapes.size(); ++i)
350 outputs[i].create(outShapes[i], ddepth);
351 for (size_t i = 0; i < internalShapes.size(); ++i)
352 internals[i].create(internalShapes[i], ddepth);
354 layer->finalize(inputs, outputs);
355 layer->forward(inputs, outputs, internals);
358 std::map<std::string, Mat> ONNXImporter::getGraphTensors(
359 const opencv_onnx::GraphProto& graph_proto)
361 std::map<std::string, Mat> layers_weights;
363 for (int i = 0; i < graph_proto.initializer_size(); i++)
365 const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i);
366 dumpTensorProto(i, tensor_proto, "initializer");
367 Mat mat = getMatFromTensor(tensor_proto);
368 releaseONNXTensor(const_cast<opencv_onnx::TensorProto&>(tensor_proto)); // drop already loaded data
370 if (DNN_DIAGNOSTICS_RUN && mat.empty())
373 layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
375 return layers_weights;
378 static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
379 std::vector<int32_t> dst(src.size());
380 convertInt64ToInt32(src, dst, src.size());
381 return DictValue::arrayInt(&dst[0], src.size());
384 static DictValue parseStr(const ::google::protobuf::RepeatedPtrField< ::std::string>& src) {
385 return DictValue::arrayString(src.begin(), static_cast<int>(src.size()));
388 LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto)
391 for(int i = 0; i < node_proto.attribute_size(); i++)
393 opencv_onnx::AttributeProto attribute_proto = node_proto.attribute(i);
394 std::string attribute_name = attribute_proto.name();
398 if(attribute_name == "kernel_shape")
400 CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
401 lp.set("kernel_size", parse(attribute_proto.ints()));
403 else if(attribute_name == "strides")
405 CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
406 lp.set("stride", parse(attribute_proto.ints()));
408 else if(attribute_name == "pads")
410 if (node_proto.op_type() == "Pad")
413 // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
414 // We need to shuffle it to begin0, end0, begin1, end1, ...
415 CV_Assert(attribute_proto.ints_size() % 2 == 0);
416 const int dims = attribute_proto.ints_size() / 2;
417 std::vector<int32_t> paddings;
418 paddings.reserve(attribute_proto.ints_size());
419 for (int i = 0; i < dims; ++i)
421 paddings.push_back(attribute_proto.ints(i));
422 paddings.push_back(attribute_proto.ints(dims + i));
424 lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
428 // Convolution or pooling.
429 CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
430 lp.set("pad", parse(attribute_proto.ints()));
433 else if(attribute_name == "auto_pad")
435 if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") {
436 lp.set("pad_mode", "SAME");
438 else if (attribute_proto.s() == "VALID") {
439 lp.set("pad_mode", "VALID");
442 else if(attribute_name == "dilations")
444 CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
445 lp.set("dilation", parse(attribute_proto.ints()));
447 else if(attribute_name == "activations" && node_proto.op_type() == "LSTM")
449 lp.set(attribute_name, parseStr(attribute_proto.strings()));
451 else if (attribute_proto.has_i())
453 ::google::protobuf::int64 src = attribute_proto.i();
454 if (src < std::numeric_limits<int32_t>::min() || src > std::numeric_limits<int32_t>::max())
455 CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range");
457 lp.set(attribute_name, saturate_cast<int32_t>(src));
459 else if (attribute_proto.has_f())
461 lp.set(attribute_name, attribute_proto.f());
463 else if (attribute_proto.has_s())
465 lp.set(attribute_name, attribute_proto.s());
467 else if (attribute_proto.floats_size() > 0)
469 lp.set(attribute_name, DictValue::arrayReal(
470 attribute_proto.floats().data(), attribute_proto.floats_size()));
472 else if (attribute_proto.ints_size() > 0)
474 lp.set(attribute_name, parse(attribute_proto.ints()));
476 else if (attribute_proto.has_t())
478 opencv_onnx::TensorProto tensor = attribute_proto.t();
479 Mat blob = getMatFromTensor(tensor);
480 lp.blobs.push_back(blob);
482 else if (attribute_proto.has_g())
484 CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str()));
486 else if (attribute_proto.graphs_size() > 0)
488 CV_Error(Error::StsNotImplemented,
489 cv::format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported",
490 attribute_name.c_str(), attribute_proto.graphs_size())
493 else if (attribute_proto.strings_size() > 0)
495 std::string msg = cv::format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported",
496 attribute_name.c_str(), attribute_proto.strings_size());
497 CV_LOG_ERROR(NULL, msg);
498 for (int i = 0; i < attribute_proto.strings_size(); i++)
500 CV_LOG_ERROR(NULL, " Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'");
502 CV_Error(Error::StsNotImplemented, msg);
504 else if (attribute_proto.tensors_size() > 0)
506 CV_Error(Error::StsNotImplemented,
507 cv::format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported",
508 attribute_name.c_str(), attribute_proto.tensors_size())
513 CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str()));
516 catch (const cv::Exception& e)
519 if (DNN_DIAGNOSTICS_RUN)
521 CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem with processing attributes for node " << node_proto.name() << " Attribute " << attribute_name.c_str()
531 Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, int index)
533 CV_Assert(index < node_proto.input_size());
534 const std::string& input_name = node_proto.input(index);
535 return getBlob(input_name);
538 Mat ONNXImporter::getBlob(const std::string& input_name)
540 std::map<std::string, Mat>::const_iterator constBlob = constBlobs.find(input_name);
541 if (constBlob == constBlobs.end())
543 CV_Error(Error::StsBadArg, std::string("Blob ") + input_name + " not found in const blobs");
545 return constBlob->second;
548 void ONNXImporter::addLayer(LayerParams& layerParams,
549 const opencv_onnx::NodeProto& node_proto)
551 int depth = layerParams.get<int>("depth", CV_32F);
552 int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams);
553 for (int i = 0; i < node_proto.output_size(); ++i)
555 layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i)));
558 std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
560 for (int j = 0; j < node_proto.input_size(); j++)
562 const std::string& input_name = node_proto.input(j);
563 IterLayerId_t layerId = layer_id.find(input_name);
564 if (layerId != layer_id.end()) {
565 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum);
567 // Collect input shapes.
568 IterShape_t shapeIt = outShapes.find(input_name);
569 CV_Assert(shapeIt != outShapes.end());
570 layerInpShapes.push_back(shapeIt->second);
573 // Compute shape of output blob for this layer.
574 Ptr<Layer> layer = dstNet.getLayer(id); // FIXIT: avoid instantiation of layers during the import stage
575 layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
576 for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i)
578 outShapes[node_proto.output(i)] = layerOutShapes[i];
582 /** @brief Make N copies of input layer and set them as input to node_proto.
583 * @param prefix prefix of new layers' names
584 * @param node_proto node which will contain all copies as inputs
585 * @param input name of the node to copy
586 * @param n number of copies
588 void ONNXImporter::expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
589 const std::string& input, size_t n)
591 std::vector<std::string> input_names;
592 input_names.reserve(n);
593 for (size_t j = 0; j < n; j++)
596 copyLP.name = format("%s/copy_%zu", prefix.c_str(), j);
597 copyLP.type = "Identity";
598 CV_Assert((layer_id.find(copyLP.name) == layer_id.end()) &&
599 "Couldn't copy the node: generated name already exists in the graph.");
600 input_names.push_back(copyLP.name);
602 node_proto.set_input(0, input);
603 node_proto.set_output(0, copyLP.name);
604 addLayer(copyLP, node_proto);
606 node_proto.clear_input();
607 for (size_t i = 0; i < input_names.size(); i++)
609 node_proto.add_input(input_names[i]);
613 /** @brief Multiply one of node_proto inputs by -1
614 * @param layerParams parameters of the node
615 * @param node_proto node which input will be replaced
616 * @param input_id id of input to be multiplied by -1
618 void ONNXImporter::addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id)
620 LayerParams powerParams;
621 powerParams.name = layerParams.name + "/neg";
622 powerParams.type = "Power";
623 powerParams.set("scale", -1.f);
626 int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
628 IterLayerId_t layerId = layer_id.find(node_proto.input(input_id));
629 CV_Assert(layerId != layer_id.end());
630 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
632 layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
633 outShapes[powerParams.name] = outShapes[node_proto.input(input_id)];
635 //Replace input to Power
636 node_proto.set_input(input_id, powerParams.name);
639 void ONNXImporter::addConstant(const std::string& name, const Mat& blob)
641 CV_LOG_DEBUG(NULL, "DNN/ONNX: add constant '" << name << "' shape=" << toString(shape(blob)) << ": " << toString(blob));
642 constBlobs.insert(std::make_pair(name, blob));
643 outShapes.insert(std::make_pair(name, shape(blob)));
646 void ONNXImporter::parseOperatorSet()
648 int ir_version = model_proto.has_ir_version() ? static_cast<int>(model_proto.ir_version()) : -1;
652 int opset_size = model_proto.opset_import_size();
655 CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information")
659 for (int i = 0; i < opset_size; ++i)
661 const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i);
662 const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string();
663 int version = opset_entry.has_version() ? opset_entry.version() : -1;
664 if (domain.empty() || domain == str_domain_ai_onnx)
666 // ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md
667 onnx_opset = std::max(onnx_opset, version);
668 onnx_opset_map[str_domain_ai_onnx] = onnx_opset;
672 CV_LOG_DEBUG(NULL, "DNN/ONNX: using non-standard ONNX opset[" << i << "]: domain='" << domain << "' version=" << version);
673 onnx_opset_map[domain] = onnx_opset;
677 CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset);
679 buildDispatchMap_ONNX_AI(onnx_opset);
680 for (const auto& pair : onnx_opset_map)
682 if (pair.first == str_domain_ai_onnx)
684 continue; // done above
686 else if (pair.first == "com.microsoft")
688 buildDispatchMap_COM_MICROSOFT(pair.second);
692 CV_LOG_INFO(NULL, "DNN/ONNX: unknown domain='" << pair.first << "' version=" << pair.second << ". No dispatch map, you may need to register 'custom' layers.");
697 void ONNXImporter::handleQuantizedNode(LayerParams& layerParams,
698 const opencv_onnx::NodeProto& node_proto)
700 // Quantized nodes have output names ending with 'quantized'
701 std::string outName = node_proto.output(0);
702 int len = outName.length();
706 if (outName.substr(len - 9) == "quantized")
708 outName = outName.substr(0, len - 9);
709 Mat scale, zeropoint;
711 if (constBlobs.find(outName + "scale") != constBlobs.end() &&
712 constBlobs.find(outName + "zero_point") != constBlobs.end())
714 scale = getBlob(outName + "scale");
715 zeropoint = getBlob(outName + "zero_point");
719 std::string inpName = node_proto.input(0);
720 inpName = inpName.substr(0, inpName.length() - 9);
721 scale = getBlob(inpName + "scale");
722 zeropoint = getBlob(inpName + "zero_point");
724 for (int i = 0; i < node_proto.output_size(); i++)
726 std::string out = node_proto.output(i);
727 out = out.substr(0, out.length() - 9);
728 addConstant(out + "scale", scale);
729 addConstant(out + "zero_point", zeropoint);
733 if (scale.total() != 1 || zeropoint.total() != 1)
734 CV_Error(Error::StsNotImplemented, "Per-channel scales/zeropoints are not supported");
736 layerParams.set("depth", CV_8S);
737 layerParams.set("scales", DictValue::arrayReal(scale.ptr<float>(), 1));
738 layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr<int8_t>(), 1));
742 void ONNXImporter::populateNet()
744 CV_Assert(model_proto.has_graph());
745 graph_proto = model_proto.graph();
747 std::string framework_version;
748 if (model_proto.has_producer_name())
749 framework_name = model_proto.producer_name();
750 if (model_proto.has_producer_version())
751 framework_version = model_proto.producer_version();
753 CV_LOG_INFO(NULL, "DNN/ONNX: loading ONNX"
754 << (model_proto.has_ir_version() ? cv::format(" v%d", (int)model_proto.ir_version()) : cv::String())
755 << " model produced by '" << framework_name << "'"
756 << (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str()))
757 << ". Number of nodes = " << graph_proto.node_size()
758 << ", initializers = " << graph_proto.initializer_size()
759 << ", inputs = " << graph_proto.input_size()
760 << ", outputs = " << graph_proto.output_size()
765 simplifySubgraphs(graph_proto);
767 const int layersSize = graph_proto.node_size();
768 CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes");
770 constBlobs = getGraphTensors(graph_proto); // scan GraphProto.initializer
771 std::vector<String> netInputs; // map with network inputs (without const blobs)
772 // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
773 for (int i = 0; i < graph_proto.input_size(); ++i)
775 const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i);
776 CV_Assert(valueInfoProto.has_name());
777 const std::string& name = valueInfoProto.name();
778 CV_Assert(valueInfoProto.has_type());
779 const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
780 CV_Assert(typeProto.has_tensor_type());
781 const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
782 CV_Assert(tensor.has_shape());
783 const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
785 int dim_size = tensorShape.dim_size();
786 CV_CheckGE(dim_size, 0, ""); // some inputs are scalars (dims=0), e.g. in Test_ONNX_nets.Resnet34_kinetics test
787 MatShape inpShape(dim_size);
788 for (int j = 0; j < dim_size; ++j)
790 const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
791 if (dimension.has_dim_param())
793 CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
795 // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
796 if (dimension.has_denotation())
798 CV_LOG_INFO(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
800 inpShape[j] = dimension.dim_value();
801 // NHW, NCHW(NHWC), NCDHW(NDHWC); do not set this flag if only N is dynamic
802 if (dimension.has_dim_param() && !(j == 0 && inpShape.size() >= 3))
804 hasDynamicShapes = true;
807 bool isInitialized = ((constBlobs.find(name) != constBlobs.end()));
808 CV_LOG_IF_DEBUG(NULL, !isInitialized, "DNN/ONNX: input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
809 CV_LOG_IF_VERBOSE(NULL, 0, isInitialized, "DNN/ONNX: pre-initialized input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
810 if (dim_size > 0 && !hasDynamicShapes) // FIXIT result is not reliable for models with multiple inputs
812 inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
814 outShapes[valueInfoProto.name()] = inpShape;
815 // fill map: push layer name, layer id and output id
818 netInputs.push_back(name);
819 layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1)));
823 dstNet.setInputsNames(netInputs);
826 for (int i = 0; i < graph_proto.output_size(); ++i)
828 dumpValueInfoProto(i, graph_proto.output(i), "output");
831 if (DNN_DIAGNOSTICS_RUN) {
832 CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!");
833 layerHandler->fillRegistry(graph_proto);
836 for(int li = 0; li < layersSize; li++)
838 const opencv_onnx::NodeProto& node_proto = graph_proto.node(li);
839 handleNode(node_proto);
842 CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN ? "DNN/ONNX: diagnostic run completed!" : "DNN/ONNX: import completed!"));
845 std::string ONNXImporter::getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto)
847 if (!node_proto.has_domain())
848 return str_domain_ai_onnx;
849 const std::string& domain = node_proto.domain();
851 return str_domain_ai_onnx;
855 const ONNXImporter::DispatchMap& ONNXImporter::getDispatchMap(const opencv_onnx::NodeProto& node_proto)
857 static DispatchMap empty_map;
858 const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
859 auto it = domain_dispatch_map.find(layer_type_domain);
860 if (it == domain_dispatch_map.end())
868 void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto)
870 CV_Assert(node_proto.output_size() >= 1);
871 const std::string& name = node_proto.output(0);
872 const std::string& layer_type = node_proto.op_type();
873 const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
874 const auto& dispatch = getDispatchMap(node_proto);
876 CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and "
877 << node_proto.output_size() << " outputs: "
878 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
879 << cv::format(" from %sdomain='", onnx_opset_map.count(layer_type_domain) == 1 ? "" : "undeclared ")
880 << layer_type_domain << "'"
883 if (dispatch.empty())
885 CV_LOG_WARNING(NULL, "DNN/ONNX: missing dispatch map for domain='" << layer_type_domain << "'");
888 LayerParams layerParams;
891 // FIXIT not all cases can be repacked into "LayerParams". Importer should handle such cases directly for each "layer_type"
892 layerParams = getLayerParams(node_proto);
894 layerParams.name = name;
895 layerParams.type = layer_type;
896 layerParams.set("has_dynamic_shapes", hasDynamicShapes);
898 handleQuantizedNode(layerParams, node_proto);
900 DispatchMap::const_iterator iter = dispatch.find(layer_type);
901 if (iter != dispatch.end())
903 CALL_MEMBER_FN(*this, iter->second)(layerParams, node_proto);
907 parseCustomLayer(layerParams, node_proto);
910 catch (const cv::Exception& e)
912 if (DNN_DIAGNOSTICS_RUN)
914 CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
915 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
916 << " from domain='" << layer_type_domain << "'"
919 cv::AutoLock lock(getLayerFactoryMutex());
920 auto registeredLayers = getLayerFactoryImpl();
921 if (registeredLayers.find(layerParams.type) != registeredLayers.end())
925 Ptr<Layer> layer = LayerFactory::createLayerInstance(layerParams.type, layerParams);
927 catch (const std::exception& e)
929 CV_LOG_ERROR(NULL, "DNN/ONNX: Layer of type " << layerParams.type << "(" << layer_type << ") cannot be created with parameters " << layerParams << ". Error: " << e.what()
936 CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
937 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
938 << " from domain='" << layer_type_domain << "'"
941 for (int i = 0; i < node_proto.input_size(); i++)
943 CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'");
945 for (int i = 0; i < node_proto.output_size(); i++)
947 CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'");
949 if (DNN_DIAGNOSTICS_RUN)
951 for (int i = 0; i < node_proto.output_size(); ++i)
953 layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(0, i)));
954 outShapes[node_proto.output(i)] = outShapes[node_proto.input(0)];
958 CV_Error(Error::StsError, cv::format("Node [%s@%s]:(%s) parse error: %s", layer_type.c_str(), layer_type_domain.c_str(), name.c_str(), e.what()));
962 void ONNXImporter::parseArg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
964 const std::string& layer_type = node_proto.op_type();
965 layerParams.type = "Arg";
966 layerParams.set("op", layer_type == "ArgMax" ? "max" : "min");
967 addLayer(layerParams, node_proto);
970 void setCeilMode(LayerParams& layerParams)
972 // auto_pad attribute is deprecated and uses ceil
973 if (layerParams.has("pad_mode"))
975 layerParams.set("ceil_mode", true);
977 else if (!layerParams.has("ceil_mode"))
979 layerParams.set("ceil_mode", false);
983 void ONNXImporter::parseMaxUnpool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
985 layerParams.type = "MaxUnpool";
987 DictValue kernel_shape = layerParams.get("kernel_size");
988 CV_Assert(kernel_shape.size() == 2);
989 layerParams.set("pool_k_w", kernel_shape.get<int>(0));
990 layerParams.set("pool_k_h", kernel_shape.get<int>(1));
992 int pool_pad_w = 0, pool_pad_h = 0;
993 if (layerParams.has("pad"))
995 DictValue pads = layerParams.get("pad");
996 CV_CheckEQ(pads.size(), 2, "");
997 pool_pad_w = pads.get<int>(0);
998 pool_pad_h = pads.get<int>(1);
1000 layerParams.set("pool_pad_w", pool_pad_w);
1001 layerParams.set("pool_pad_h", pool_pad_h);
1004 int pool_stride_w = 1, pool_stride_h = 1;
1005 if (layerParams.has("stride"))
1007 DictValue strides = layerParams.get("stride");
1008 CV_CheckEQ(strides.size(), 2, "");
1009 pool_stride_w = strides.get<int>(0);
1010 pool_stride_h = strides.get<int>(1);
1012 layerParams.set("pool_stride_w", pool_stride_w);
1013 layerParams.set("pool_stride_h", pool_stride_h);
1015 addLayer(layerParams, node_proto);
1018 void ONNXImporter::parseMaxPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1020 int depth = layerParams.get<int>("depth", CV_32F);
1021 layerParams.type = (depth == CV_8S) ? "PoolingInt8" : "Pooling";
1022 layerParams.set("pool", "MAX");
1023 setCeilMode(layerParams);
1024 addLayer(layerParams, node_proto);
1027 void ONNXImporter::parseAveragePool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1029 layerParams.type = "Pooling";
1030 layerParams.set("pool", "AVE");
1031 setCeilMode(layerParams);
1032 layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
1033 addLayer(layerParams, node_proto);
1036 void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1038 opencv_onnx::NodeProto node_proto = node_proto_;
1039 const std::string& layer_type = node_proto.op_type();
1041 CV_Assert(node_proto.input_size() == 1);
1042 layerParams.type = "Pooling";
1044 if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax")
1046 else if (layer_type == "ReduceSum")
1050 layerParams.set("pool", pool);
1051 layerParams.set("global_pooling", !layerParams.has("axes"));
1052 bool keepdims = layerParams.get<int>("keepdims", 1) == 1;
1053 if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
1055 MatShape inpShape = outShapes[node_proto.input(0)];
1056 DictValue axes = layerParams.get("axes");
1057 MatShape targetShape;
1058 std::vector<bool> shouldDelete(inpShape.size(), false);
1059 for (int i = 0; i < axes.size(); i++) {
1060 int axis = normalize_axis(axes.get<int>(i), inpShape.size());
1061 shouldDelete[axis] = true;
1063 for (int axis = 0; axis < inpShape.size(); ++axis){
1064 if (!shouldDelete[axis])
1065 targetShape.push_back(inpShape[axis]);
1067 targetShape.push_back(1);
1070 if (inpShape.size() == 3 && axes.size() <= 2)
1072 int axis = normalize_axis(axes.get<int>(0), inpShape.size());
1073 CV_CheckNE(axis, 0, "");
1075 LayerParams reshapeLp;
1076 reshapeLp.name = layerParams.name + "/reshape";
1077 reshapeLp.type = "Reshape";
1078 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
1079 reshapeLp.set("axis", 0);
1080 reshapeLp.set("num_axes", 1);
1081 int newShape[] = {1, -1};
1082 reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2));
1084 opencv_onnx::NodeProto proto;
1085 proto.add_input(node_proto.input(0));
1086 proto.add_output(reshapeLp.name);
1087 addLayer(reshapeLp, proto);
1090 avgLp.name = layerParams.name + "/avg";
1091 avgLp.type = "Pooling";
1092 CV_Assert(layer_id.find(avgLp.name) == layer_id.end());
1093 avgLp.set("pool", pool);
1094 if (axes.size() == 2)
1096 CV_CheckEQ(normalize_axis(axes.get<int>(0), inpShape.size()), 1, "Unsupported mode");
1097 CV_CheckEQ(normalize_axis(axes.get<int>(1), inpShape.size()), 2, "Unsupported mode");
1098 avgLp.set("global_pooling", true);
1102 avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true);
1103 avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1);
1106 node_proto.set_input(0, reshapeLp.name);
1107 node_proto.set_output(0, avgLp.name);
1108 addLayer(avgLp, node_proto);
1112 if (inpShape.size() != 4 && inpShape.size() != 5)
1113 CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation.");
1115 CV_Assert(axes.size() <= inpShape.size() - 2);
1116 std::vector<int> kernel_size(inpShape.size() - 2, 1);
1117 if (axes.size() == 1 && (normalize_axis(axes.get<int>(0), inpShape.size()) <= 1))
1119 int axis = normalize_axis(axes.get<int>(0), inpShape.size());
1120 MatShape newShape = inpShape;
1121 newShape[axis + 1] = total(newShape, axis + 1);
1122 newShape.resize(axis + 2);
1123 newShape.insert(newShape.begin(), 2 - axis, 1);
1125 LayerParams reshapeLp;
1126 reshapeLp.type = "Reshape";
1127 reshapeLp.name = layerParams.name + "/reshape";
1128 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
1129 reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size()));
1131 node_proto.set_output(0, reshapeLp.name);
1132 addLayer(reshapeLp, node_proto);
1134 kernel_size.resize(2);
1135 kernel_size[0] = inpShape[axis];
1136 node_proto.set_input(0, node_proto.output(0));
1140 for (int i = 0; i < axes.size(); i++) {
1141 int axis = normalize_axis(axes.get<int>(i), inpShape.size());
1142 CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
1143 kernel_size[axis - 2] = inpShape[axis];
1147 LayerParams poolLp = layerParams;
1148 poolLp.name = layerParams.name + "/avg";
1149 CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
1150 poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
1152 node_proto.set_output(0, poolLp.name);
1153 addLayer(poolLp, node_proto);
1156 layerParams.type = "Reshape";
1157 layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));
1159 node_proto.set_input(0, node_proto.output(0));
1160 node_proto.set_output(0, layerParams.name);
1162 else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
1164 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
1165 CV_Assert(shapeIt != outShapes.end());
1166 const size_t dims = keepdims ? shapeIt->second.size() : 1;
1168 LayerParams reshapeLp;
1169 reshapeLp.name = layerParams.name + "/reshape";
1170 reshapeLp.type = "Reshape";
1171 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
1172 int newShape[] = {1, 1, 1, -1};
1173 reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4));
1175 opencv_onnx::NodeProto proto;
1176 proto.add_input(node_proto.input(0));
1177 proto.add_output(reshapeLp.name);
1178 addLayer(reshapeLp, proto);
1180 LayerParams poolLp = layerParams;
1181 poolLp.name = layerParams.name + "/pool";
1182 CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
1184 node_proto.set_input(0, reshapeLp.name);
1185 node_proto.set_output(0, poolLp.name);
1186 addLayer(poolLp, node_proto);
1188 layerParams.type = "Reshape";
1189 std::vector<int> targetShape(dims, 1);
1190 layerParams.set("dim", DictValue::arrayInt(targetShape.data(), targetShape.size()));
1192 node_proto.set_input(0, node_proto.output(0));
1193 node_proto.set_output(0, layerParams.name);
1195 addLayer(layerParams, node_proto);
1198 void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1201 std::vector<int> begin;
1202 std::vector<int> end;
1203 std::vector<int> steps;
1204 int inp_size = node_proto.input_size();
1208 if (layerParams.has("axes")) {
1209 DictValue axes = layerParams.get("axes");
1210 for (int i = 1; i < axes.size(); ++i) {
1211 CV_Assert(axes.get<int>(i - 1) == axes.get<int>(i) - 1);
1213 axis = axes.get<int>(0);
1216 DictValue starts = layerParams.get("starts");
1217 DictValue ends = layerParams.get("ends");
1218 CV_Assert(starts.size() == ends.size());
1221 CV_CheckLE(axis, 1024, "Slice layer can't have more than 1024 axes"); // arbitrary limit
1222 begin.resize(axis, 0);
1223 end.resize(axis, -1);
1225 for (int i = 0; i < starts.size(); ++i)
1227 begin.push_back(starts.get<int>(i));
1228 int finish = ends.get<int>(i);
1229 end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim
1231 } else { // inp_size > 1
1232 CV_Assert(inp_size >= 3);
1233 for (int i = 1; i < inp_size; i++) {
1234 CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end());
1236 Mat start_blob = getBlob(node_proto, 1);
1237 Mat end_blob = getBlob(node_proto, 2);
1238 CV_Assert(start_blob.total() == end_blob.total());
1241 Mat axes_blob = getBlob(node_proto, 3);
1242 const int* axes = (int*)axes_blob.data;
1243 for (int i = 1; i < axes_blob.total(); ++i) {
1244 CV_Assert(axes[i - 1] == axes[i] - 1);
1249 const int* starts = start_blob.ptr<int>();
1250 const int* ends = end_blob.ptr<int>();
1252 begin.resize(axis, 0);
1253 end.resize(axis, -1);
1255 std::copy(starts, starts + start_blob.total(), std::back_inserter(begin));
1256 for (int i = 0; i < end_blob.total(); ++i)
1258 int finish = ends[i];
1259 end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim
1262 if (inp_size == 5) {
1263 CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end());
1264 Mat step_blob = getBlob(node_proto, 4);
1265 const int* steps_ptr = step_blob.ptr<int>();
1268 steps.resize(axis, 1);
1270 std::copy(steps_ptr, steps_ptr + step_blob.total(), std::back_inserter(steps));
1272 // Very strange application for Slice op with tensor reversing.
1273 // We just workaround it for 2d constants.
1274 if (constBlobs.find(node_proto.input(0)) != constBlobs.end() &&
1276 start_blob.at<int>(0) == -1 && step_blob.at<int>(0) == -1 &&
1277 end_blob.at<int>(0) == std::numeric_limits<int32_t>::min())
1279 Mat inp = getBlob(node_proto, 0);
1283 flip(inp, flipped, 0);
1284 addConstant(layerParams.name, flipped);
1290 layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size()));
1291 layerParams.set("end", DictValue::arrayInt(&end[0], end.size()));
1292 layerParams.set("axis", axis);
1295 layerParams.set("steps", DictValue::arrayInt(&steps[0], steps.size()));
1297 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1299 Mat inp = getBlob(node_proto, 0);
1300 std::vector<Mat> inputs, sliced;
1301 inputs.push_back(inp);
1302 runLayer(layerParams, inputs, sliced);
1303 CV_Assert(sliced.size() == 1);
1304 addConstant(layerParams.name, sliced[0]);
1307 addLayer(layerParams, node_proto);
1310 void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1312 if (layerParams.has("split"))
1314 DictValue splits = layerParams.get("split");
1315 const int numSplits = splits.size();
1316 CV_Assert(numSplits > 1);
1318 std::vector<int> slicePoints(numSplits - 1, splits.get<int>(0));
1319 for (int i = 1; i < splits.size() - 1; ++i)
1321 slicePoints[i] = slicePoints[i - 1] + splits.get<int>(i);
1323 layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
1327 layerParams.set("num_split", node_proto.output_size());
1329 int depth = layerParams.get<int>("depth", CV_32F);
1330 layerParams.type = (depth == CV_8S) ? "SliceInt8" : "Slice";
1331 layerParams.set("axis", layerParams.get<float>("axis", 0));
1332 addLayer(layerParams, node_proto);
1335 void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1337 opencv_onnx::NodeProto node_proto = node_proto_;
1338 const std::string& layer_type = node_proto.op_type();
1339 bool isSub = layer_type == "Sub";
1341 if (layer_type == "Sum" && node_proto.input_size() == 1)
1343 layerParams.type = "Identity";
1344 addLayer(layerParams, node_proto);
1348 CV_Assert((node_proto.input_size() == 2) || (layer_type == "Sum" && node_proto.input_size() > 2));
1350 if (layer_type == "Sum" && node_proto.input_size() > 2)
1352 for (int i = 0; i < node_proto.input_size(); ++i)
1354 if (layer_id.find(node_proto.input(i)) == layer_id.end())
1356 CV_Error(Error::StsNotImplemented, "Sum of constants is not implemented for inputs > 2");
1361 bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end();
1362 bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end();
1363 if (is_const_0 && is_const_1)
1365 Mat blob_0 = getBlob(node_proto, 0);
1366 Mat blob_1 = getBlob(node_proto, 1);
1367 CV_Assert(blob_0.size == blob_1.size);
1368 Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1);
1369 addConstant(layerParams.name, output);
1372 else if (is_const_0 || is_const_1)
1374 int const_blob_id = is_const_0 ? 0 : 1;
1375 int input_id = 1 - const_blob_id;
1376 Mat blob = getBlob(node_proto, const_blob_id);
1377 int blob_total = blob.total();
1379 const float inputScale = isSub && is_const_0 ? -1.f : 1.f;
1380 const float constScale = isSub && is_const_1 ? -1.f : 1.f;
1382 if (blob_total == 1) {
1383 layerParams.type = "Power";
1384 layerParams.set("scale", inputScale);
1385 layerParams.set("shift", constScale * blob.ptr<float>()[0]);
1388 MatShape inpShape = outShapes[node_proto.input(input_id)];
1389 if (shape(blob) == inpShape)
1391 LayerParams constParams;
1392 constParams.name = layerParams.name + "/const";
1393 constParams.type = "Const";
1394 constParams.blobs.push_back(blob);
1395 int id = dstNet.addLayer(constParams.name, constParams.type, constParams);
1396 layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
1397 outShapes[constParams.name] = shape(blob);
1399 layerParams.type = "Eltwise";
1400 float coeffs[] = {1., isSub ? -1.f : 1.f};
1401 layerParams.set("coeff", DictValue::arrayReal<float*>(coeffs, 2));
1402 node_proto.set_input(const_blob_id, constParams.name);
1406 if (inputScale < 0.f)
1408 addNegation(layerParams, node_proto, input_id);
1411 layerParams.type = "Scale";
1412 layerParams.set("bias_term", true);
1414 for (int i = 0; i < graph_proto.initializer_size(); i++)
1416 opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
1417 if (tensor_proto.name() == node_proto.input(const_blob_id))
1419 axis = inpShape.size() - tensor_proto.dims_size();
1423 layerParams.set("axis", axis);
1424 blob = blob.reshape(1, 1);
1425 layerParams.blobs.push_back(constScale * blob);
1429 else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
1431 layerParams.type = "Eltwise";
1434 static float subCoeffs[] = {1.f, -1.f};
1435 layerParams.set("coeff", DictValue::arrayReal<float*>(subCoeffs, 2));
1442 addNegation(layerParams, node_proto, 1);
1444 layerParams.type = "Scale";
1445 layerParams.set("bias_term", true);
1447 addLayer(layerParams, node_proto);
1450 void ONNXImporter::parsePow(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1452 if (layer_id.find(node_proto.input(1)) != layer_id.end())
1453 CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power");
1455 Mat blob = getBlob(node_proto, 1);
1456 if (blob.total() != 1)
1457 CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power");
1459 blob.convertTo(blob, CV_32F);
1460 layerParams.type = "Power";
1461 layerParams.set("power", blob.ptr<float>()[0]);
1462 addLayer(layerParams, node_proto);
1466 void ONNXImporter::parseMinMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1468 const std::string& layer_type = node_proto.op_type();
1469 layerParams.type = "Eltwise";
1470 layerParams.set("operation", layer_type == "Max" ? "max" : "min");
1471 addLayer(layerParams, node_proto);
1474 void ONNXImporter::parseNeg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1476 layerParams.type = "Power";
1477 layerParams.set("scale", -1);
1478 addLayer(layerParams, node_proto);
1481 void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1483 CV_Assert(node_proto.input_size() == 0);
1484 CV_Assert(layerParams.blobs.size() == 1);
1485 addConstant(layerParams.name, layerParams.blobs[0]);
1488 void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1490 opencv_onnx::NodeProto node_proto = node_proto_;
1491 LayerParams lstmParams = layerParams;
1492 lstmParams.name += "/lstm";
1494 // https://pytorch.org/docs/stable/nn.html#lstm
1495 CV_Assert(node_proto.input_size() >= 7);
1496 Mat Wx = getBlob(node_proto, 1);
1497 Mat Wh = getBlob(node_proto, 2);
1498 Mat b = getBlob(node_proto, 3);
1500 const int numHidden = lstmParams.get<int>("hidden_size");
1501 const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM.
1502 const int numFeatures = Wx.size[2];
1504 // Following checks are deduced from the IFGO->IGFO loop below
1505 // Wx is numDirs X numHidden*3 X numFeatures
1506 // Wh is numDirs X numHidden*3 X numHidden
1507 CV_CheckLE(numHidden * 3, Wx.size[1], "Wx should have beat least 3x hidden_size in dimension 1");
1508 CV_CheckLE(numHidden * 3, Wh.size[1], "Wh should have be at least 3x hidden_size in dimension 1");
1509 CV_CheckLE(numHidden, Wh.size[2], "Wh should have be at least hidden_size in dimension 2");
1512 if (!node_proto.input(5).empty()) {
1513 h0 = getBlob(node_proto, 5);
1514 h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
1516 // initial_h attribute can be empty in case of keras2onnx producer. fill it with zeros
1517 h0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1);
1519 if (!node_proto.input(6).empty()) {
1520 c0 = getBlob(node_proto, 6);
1521 c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
1523 // initial_c attribute can be empty in case of keras2onnx producer. fill it with zeros
1524 c0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1);
1527 b = b.reshape(1, b.size[0]);
1528 Mat bx = b.colRange(0, b.cols / 2);
1529 Mat bh = b.colRange(b.cols / 2, b.cols);
1532 // b is numDirs X numHidden*3
1533 CV_CheckLE(numHidden * 3, b.cols, "Bias data should have at least 3x hidden_size columns");
1536 for (int k = 0; k < numDirs; ++k)
1538 float* WxData = Wx.ptr<float>(k);
1539 float* WhData = Wh.ptr<float>(k);
1540 float* biasData = b.ptr<float>(k);
1541 for (int j = 0; j < numHidden; ++j)
1543 for (int i = 0; i < numFeatures; ++i)
1545 std::swap(WxData[(numHidden + j) * numFeatures + i],
1546 WxData[(numHidden * 2 + j) * numFeatures + i]);
1548 for (int i = 0; i < numHidden; ++i)
1550 std::swap(WhData[(numHidden + j) * numHidden + i],
1551 WhData[(numHidden * 2 + j) * numHidden + i]);
1553 std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]);
1556 Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
1557 Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
1560 lstmParams.blobs.resize(5);
1561 lstmParams.blobs[0] = Wh;
1562 lstmParams.blobs[1] = Wx;
1563 lstmParams.blobs[2] = b;
1564 lstmParams.blobs[3] = h0;
1565 lstmParams.blobs[4] = c0;
1567 // read direction attribute
1568 lstmParams.set("reverse", lstmParams.get<String>("direction", "") == "reverse");
1569 lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional");
1571 node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name
1572 addLayer(lstmParams, node_proto);
1574 MatShape lstmShape = outShapes[node_proto.output(0)];
1576 // Add fake 1 as it is done in ONNX
1577 lstmShape.insert(lstmShape.begin() + 1, 1);
1579 layerParams.type = "Reshape";
1580 layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size()));
1581 node_proto.set_input(0, lstmParams.name); // redirect input to LSTM
1582 node_proto.set_output(0, layerParams.name); // keep origin LSTM's name
1583 addLayer(layerParams, node_proto);
1586 void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1588 opencv_onnx::NodeProto node_proto = node_proto_;
1589 LayerParams gruParams = layerParams;
1590 gruParams.name += "/gru";
1592 // https://pytorch.org/docs/stable/generated/torch.nn.GRU.html?highlight=gru#
1593 CV_Assert(node_proto.input_size() == 6);
1594 Mat Wx = getBlob(node_proto, 1);
1595 Mat Wh = getBlob(node_proto, 2);
1596 Mat b = getBlob(node_proto, 3);
1597 Mat h0 = getBlob(node_proto, 5);
1599 Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
1600 Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
1601 h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
1602 b = b.reshape(1, b.size[0]);
1604 gruParams.blobs.resize(4);
1605 gruParams.blobs[0] = Wh;
1606 gruParams.blobs[1] = Wx;
1607 gruParams.blobs[2] = b;
1608 gruParams.blobs[3] = h0;
1609 gruParams.set("bidirectional", gruParams.get<String>("direction", "") == "bidirectional");
1611 node_proto.set_output(0, gruParams.name); // set different name so output shapes will be registered on that name
1612 addLayer(gruParams, node_proto);
1614 MatShape gruShape = outShapes[node_proto.output(0)];
1616 // Add fake 1 as it is done in ONNX
1617 gruShape.insert(gruShape.begin() + 1, 1);
1619 layerParams.type = "Reshape";
1620 layerParams.set("dim", DictValue::arrayInt(&gruShape[0], gruShape.size()));
1621 node_proto.set_input(0, gruParams.name); // redirect input to GRU
1622 node_proto.set_output(0, layerParams.name); // keep origin GRU's name
1623 addLayer(layerParams, node_proto);
1626 void ONNXImporter::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1628 const float scale = layerParams.has("scale") ? layerParams.get<float>("scale") : 1.0f;
1629 layerParams.erase("scale");
1631 if (layerParams.has("bias"))
1633 layerParams.type = "Scale";
1634 layerParams.blobs.push_back(
1635 Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale));
1637 layerParams.set("bias_term", true);
1638 Mat bias(1, layerParams.get("bias").size(), CV_32FC1);
1639 for (int j = 0; j < bias.total(); j++) {
1640 bias.at<float>(0, j) = layerParams.get("bias").getRealValue(j);
1642 layerParams.blobs.push_back(bias);
1643 layerParams.erase("bias");
1646 layerParams.set("scale", scale);
1647 layerParams.type = "Power";
1649 addLayer(layerParams, node_proto);
1652 void ONNXImporter::parseClip(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1654 CV_CheckEQ(node_proto.input_size(), 1, "");
1655 layerParams.type = "ReLU6";
1656 layerParams.set("min_value", layerParams.get<float>("min", -FLT_MAX));
1657 layerParams.set("max_value", layerParams.get<float>("max", FLT_MAX));
1658 addLayer(layerParams, node_proto);
1661 void ONNXImporter::parseLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1663 layerParams.type = "ReLU";
1664 layerParams.set("negative_slope", layerParams.get<float>("alpha", 0.01));
1665 addLayer(layerParams, node_proto);
1668 void ONNXImporter::parseRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1670 layerParams.type = "ReLU";
1671 addLayer(layerParams, node_proto);
1674 void ONNXImporter::parseElu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1676 layerParams.type = "ELU";
1677 addLayer(layerParams, node_proto);
1680 void ONNXImporter::parseTanh(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1682 layerParams.type = "TanH";
1683 addLayer(layerParams, node_proto);
1686 void ONNXImporter::parseAbs(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1688 layerParams.type = "AbsVal";
1689 addLayer(layerParams, node_proto);
1692 void ONNXImporter::parseCompare(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1694 CV_Assert(node_proto.input_size() == 2);
1695 const std::string& layer_type = node_proto.op_type();
1697 bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end();
1698 bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end();
1700 if (is_const_0 || is_const_1)
1702 Mat blob = getBlob(node_proto, static_cast<int>(is_const_1));
1703 blob = blob.reshape(1, 1);
1704 layerParams.blobs.push_back(blob);
1707 layerParams.type = "Compare";
1709 if (layer_type == "Equal")
1710 layerParams.set("mode", "equal");
1711 else if (layer_type == "Greater")
1712 layerParams.set("mode", "greater");
1714 layerParams.set("mode", "less");
1715 addLayer(layerParams, node_proto);
1718 void ONNXImporter::parsePRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1720 layerParams.type = "PReLU";
1721 layerParams.blobs.push_back(getBlob(node_proto, 1));
1722 addLayer(layerParams, node_proto);
1725 void ONNXImporter::parseLRN(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1727 replaceLayerParam(layerParams, "size", "local_size");
1728 addLayer(layerParams, node_proto);
1731 void ONNXImporter::parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1733 opencv_onnx::NodeProto node_proto = node_proto_;
1734 if (node_proto.input_size() != 3)
1735 CV_Error(Error::StsNotImplemented,
1736 "Expected input, scale, bias");
1738 layerParams.blobs.resize(4);
1739 layerParams.blobs[2] = getBlob(node_proto, 1); // weightData
1740 layerParams.blobs[3] = getBlob(node_proto, 2); // biasData
1741 layerParams.set("has_bias", true);
1742 layerParams.set("has_weight", true);
1744 // Get number of channels in input
1745 int size = layerParams.blobs[2].total();
1746 layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean
1747 layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std
1749 LayerParams mvnParams;
1750 mvnParams.name = layerParams.name + "/MVN";
1751 mvnParams.type = "MVN";
1752 mvnParams.set("eps", layerParams.get<float>("epsilon"));
1753 layerParams.erase("epsilon");
1756 int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams);
1758 IterLayerId_t layerId = layer_id.find(node_proto.input(0));
1759 CV_Assert(layerId != layer_id.end());
1760 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
1762 layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0)));
1763 outShapes[mvnParams.name] = outShapes[node_proto.input(0)];
1765 //Replace Batch Norm's input to MVN
1766 node_proto.set_input(0, mvnParams.name);
1767 layerParams.type = "BatchNorm";
1768 addLayer(layerParams, node_proto);
1771 void ONNXImporter::parseBatchNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1773 if (node_proto.input_size() != 5)
1774 CV_Error(Error::StsNotImplemented,
1775 "Expected input, scale, bias, mean and var");
1777 layerParams.type = "BatchNorm";
1778 replaceLayerParam(layerParams, "epsilon", "eps");
1779 replaceLayerParam(layerParams, "spatial", "use_global_stats");
1781 Mat meanData = getBlob(node_proto, 3);
1782 Mat stdData = getBlob(node_proto, 4);
1784 layerParams.blobs.push_back(meanData);
1785 layerParams.blobs.push_back(stdData);
1787 if (!node_proto.input(1).empty()) {
1788 layerParams.set("has_weight", true);
1789 layerParams.blobs.push_back(getBlob(node_proto, 1)); // weightData
1791 layerParams.set("has_weight", false);
1794 if (!node_proto.input(2).empty()) {
1795 layerParams.set("has_bias", true);
1796 layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData
1798 layerParams.set("has_bias", false);
1800 addLayer(layerParams, node_proto);
1803 void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1805 CV_Assert(node_proto.input_size() >= 2);
1806 layerParams.type = "InnerProduct";
1807 Mat weights = getBlob(node_proto, 1);
1808 int ind_num_out = 0;
1809 if (layerParams.has("transB") && !layerParams.get<int>("transB")) {
1810 transpose(weights, weights);
1813 layerParams.blobs.push_back(weights);
1815 if (node_proto.input_size() == 3) {
1816 Mat bias = getBlob(node_proto, 2);
1817 layerParams.blobs.push_back(bias);
1819 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1821 Mat inputBuf = getBlob(node_proto, 0);
1823 LayerParams constParams;
1824 constParams.name = node_proto.input(0);
1825 constParams.type = "Const";
1826 constParams.blobs.push_back(inputBuf);
1828 opencv_onnx::NodeProto proto;
1829 proto.add_output(constParams.name);
1830 addLayer(constParams, proto);
1833 layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]);
1834 layerParams.set("bias_term", node_proto.input_size() == 3);
1835 addLayer(layerParams, node_proto);
1838 void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1840 CV_Assert(node_proto.input_size() == 2);
1841 layerParams.type = "InnerProduct";
1842 layerParams.set("bias_term", false);
1843 CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end());
1844 int firstInpDims = outShapes[node_proto.input(0)].size();
1847 if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
1849 Mat blob = getBlob(node_proto, 1);
1850 secondInpDims = blob.dims;
1851 layerParams.blobs.push_back(blob.t());
1852 layerParams.set("num_output", layerParams.blobs[0].size[0]);
1854 secondInpDims = outShapes[node_proto.input(1)].size();
1856 layerParams.set("axis", firstInpDims - secondInpDims + 1);
1857 addLayer(layerParams, node_proto);
1860 void findBroadAxis(const MatShape& broadShape, const MatShape& outShape, size_t& axis, int& broadAxis)
1862 const size_t diff = outShape.size() - broadShape.size();
1864 // find the first non-one element of the broadcasting shape
1866 for (; axis < broadShape.size() && broadShape[axis] == 1; ++axis) {}
1868 // find the last non-one element of the broadcasting shape
1869 size_t endAxis = broadShape.size();
1870 for (; endAxis > axis && broadShape[endAxis - 1] == 1; --endAxis) {}
1872 // find one between axis and endAxis - as it needs to be broadcasted,
1873 // dimensions from the left of axis and from the right of endAxis will be handled by Scale layer
1875 for (size_t i = axis; i < endAxis; ++i)
1877 size_t outAxis = i + diff;
1878 if (outShape[outAxis] == broadShape[i])
1883 // ensure we need to broadcast only 1 dimension in the middle
1884 CV_Assert(broadShape[i] == 1 && broadAxis == -1);
1885 broadAxis = static_cast<int>(outAxis);
1892 void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1894 opencv_onnx::NodeProto node_proto = node_proto_;
1895 const std::string& layer_type = node_proto.op_type();
1896 CV_Assert(node_proto.input_size() == 2);
1898 bool isDiv = layer_type == "Div";
1900 bool haveVariables = false;
1901 for (int i = 0; i < 2; ++i)
1903 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
1906 haveVariables = true;
1908 if (constId != -1 && haveVariables)
1910 Mat blob = getBlob(node_proto, constId);
1911 blob = blob.reshape(1, 1);
1912 if (blob.total() == 1) {
1913 float blob_value = blob.ptr<float>()[0];
1914 float coeff = blob_value;
1917 coeff = 1.f / blob_value;
1920 // Power layer calculates (x*scale + shift)^power, so const/x -> (x * (1/const) + 0)^(-1)
1921 layerParams.set("power", -1.f);
1924 layerParams.set("scale", coeff);
1925 layerParams.type = "Power";
1929 divide(1.0, blob, blob);
1930 layerParams.blobs.push_back(blob);
1931 layerParams.type = "Scale";
1934 else if (!haveVariables)
1936 Mat inp0 = getBlob(node_proto, 0);
1937 Mat inp1 = getBlob(node_proto, 1);
1939 if (inp0.size != inp1.size && (inp0.total() != 1 || inp1.total() != 1))
1940 CV_Error_(Error::StsNotImplemented, ("Different shapes case is not supported with constant inputs: %s", layer_type.c_str()));
1942 if (inp0.total() == 1 && inp1.total() == 1 && inp0.dims != inp1.dims)
1944 if (inp0.dims < inp1.dims)
1946 inp0 = inp0.reshape(1, inp1.dims, inp1.size);
1947 inp0.dims = inp1.dims;
1951 inp1 = inp1.reshape(1, inp0.dims, inp0.size);
1952 inp1.dims = inp0.dims;
1957 if (inp0.total() != inp1.total())
1959 if (inp0.total() == 1)
1961 float inp0_value = inp0.ptr<float>()[0];
1962 float coeff = isDiv ? 1.0 / inp0_value : inp0_value;
1963 multiply(inp1, coeff, out);
1967 float inp1_value = inp1.ptr<float>()[0];
1968 float coeff = isDiv ? 1.0 / inp1_value : inp1_value;
1969 multiply(inp0, coeff, out);
1975 out = isDiv ? inp0 / inp1 : inp0.mul(inp1);
1978 if (inp0.dims == 1 && inp1.dims == 1)
1979 out.dims = 1; // to workaround dims == 1
1980 addConstant(layerParams.name, out);
1983 else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
1985 layerParams.type = "Eltwise";
1986 layerParams.set("operation", isDiv ? "div" : "prod");
1990 // Scale layer allocate output with the first input shape
1991 if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)]))
1993 opencv_onnx::NodeProto proto;
1994 proto.add_input(node_proto.input(1));
1995 proto.add_input(node_proto.input(0));
1996 proto.add_output(layerParams.name);
2002 LayerParams powerParams;
2003 powerParams.name = layerParams.name + "/inv";
2004 powerParams.type = "Power";
2005 powerParams.set("power", -1);
2007 //Create Power layer
2008 int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
2010 IterLayerId_t layerId = layer_id.find(node_proto.input(1));
2011 CV_Assert(layerId != layer_id.end());
2012 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
2014 layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
2015 outShapes[powerParams.name] = outShapes[node_proto.input(1)];
2017 //Replace input to Power
2018 node_proto.set_input(1, powerParams.name);
2021 const MatShape& broadShape = outShapes[node_proto.input(1)];
2022 const MatShape& outShape = outShapes[node_proto.input(0)];
2026 findBroadAxis(broadShape, outShape, axis, broadAxis);
2028 // if there is a one dimension in the middle that should be broadcasted, broadcast it
2029 if (broadAxis != -1)
2031 opencv_onnx::NodeProto concat_node_proto = node_proto;
2032 const std::string& input1 = concat_node_proto.input(1);
2034 expandMid(layerParams.name, concat_node_proto, input1, outShape[broadAxis]);
2036 LayerParams concatLP;
2037 concatLP.name = layerParams.name + "/concat";
2038 concatLP.set("axis", broadAxis);
2039 concatLP.type = "Concat";
2040 concat_node_proto.set_output(0, concatLP.name);
2042 addLayer(concatLP, concat_node_proto);
2043 node_proto.set_input(1, concatLP.name);
2046 CV_Assert(axis != outShape.size());
2047 layerParams.set("axis", static_cast<int>(axis));
2048 layerParams.type = "Scale";
2050 addLayer(layerParams, node_proto);
2053 void ONNXImporter::parseConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2055 opencv_onnx::NodeProto node_proto = node_proto_;
2056 CV_Assert(node_proto.input_size() >= 2);
2057 layerParams.type = "Convolution";
2058 for (int j = 1; j < node_proto.input_size(); j++) {
2059 if (constBlobs.find(node_proto.input(j)) != constBlobs.end())
2061 layerParams.blobs.push_back(getBlob(node_proto, j));
2064 int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0];
2065 layerParams.set("num_output", outCn);
2067 // Check for asymmetric padding in Conv2D
2068 if (layerParams.has("pad"))
2070 bool asymmetricPadding = false;
2071 DictValue pads = layerParams.get("pad");
2072 const int dims = pads.size() / 2;
2073 for (int i = 0; i < dims; ++i)
2075 if (pads.get<int>(i) != pads.get<int>(i + dims))
2077 asymmetricPadding = true;
2081 if (asymmetricPadding && pads.size() == 4) // [pad_t, pad_l, pad_b, pad_r]
2083 layerParams.erase("pad");
2084 // No paddings required for N, C axis
2085 std::vector<int> paddings(4, 0);
2086 // Add paddings for H, W axis
2087 for (int i = 0; i < dims; ++i)
2089 paddings.push_back(pads.get<int>(i));
2090 paddings.push_back(pads.get<int>(dims + i));
2093 padLp.name = layerParams.name + "/pad";
2094 padLp.type = "Padding";
2095 padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
2097 opencv_onnx::NodeProto proto;
2098 proto.add_input(node_proto.input(0));
2099 proto.add_output(padLp.name);
2101 addLayer(padLp, proto);
2102 node_proto.set_input(0, padLp.name);
2105 addLayer(layerParams, node_proto);
2108 void ONNXImporter::parseConvTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2110 CV_Assert(node_proto.input_size() >= 2);
2111 layerParams.type = "Deconvolution";
2112 for (int j = 1; j < node_proto.input_size(); j++) {
2113 layerParams.blobs.push_back(getBlob(node_proto, j));
2115 layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get<int>("group", 1));
2116 layerParams.set("bias_term", node_proto.input_size() == 3);
2118 if (!layerParams.has("kernel_size"))
2119 CV_Error(Error::StsNotImplemented,
2120 "Required attribute 'kernel_size' is not present.");
2122 if (layerParams.has("output_shape"))
2124 const DictValue& outShape = layerParams.get("output_shape");
2125 DictValue strides = layerParams.get("stride");
2126 DictValue kernel = layerParams.get("kernel_size");
2129 std::vector<int> adjust_pads;
2130 if (layerParams.has("pad_mode"))
2132 padMode = toUpperCase(layerParams.get<String>("pad_mode"));
2133 if (padMode != "SAME" && padMode != "VALID")
2134 CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
2136 for (int i = 0; i < strides.size(); i++)
2138 int sz = outShape.get<int>(2 + i);
2139 int stride = strides.get<int>(i);
2140 adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride :
2141 (sz - kernel.get<int>(i)) % stride);
2143 layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size()));
2146 else if (layerParams.has("output_padding"))
2148 replaceLayerParam(layerParams, "output_padding", "adj");
2150 addLayer(layerParams, node_proto);
2153 void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2155 int depth = layerParams.get<int>("depth", CV_32F);
2156 layerParams.type = (depth == CV_8S) ? "PermuteInt8" : "Permute";
2157 replaceLayerParam(layerParams, "perm", "order");
2158 if (!layerParams.has("order")) {
2159 MatShape inpShape = outShapes[node_proto.input(0)];
2160 size_t dims = inpShape.size();
2161 std::vector<int> perm(dims);
2162 for (size_t d = 0; d < dims; ++d)
2164 perm[d] = static_cast<int>(dims - 1 - d);
2166 layerParams.set("order", DictValue::arrayInt(perm.data(), perm.size()));
2169 CV_Assert(node_proto.input_size() == 1);
2170 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2172 std::vector<Mat> inputs(1, getBlob(node_proto, 0)), transposed;
2173 runLayer(layerParams, inputs, transposed);
2174 CV_Assert(transposed.size() == 1);
2175 addConstant(layerParams.name, transposed[0]);
2178 addLayer(layerParams, node_proto);
2181 void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2183 CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
2184 DictValue axes_dict = layerParams.get("axes");
2185 MatShape inpShape = outShapes[node_proto.input(0)];
2187 std::vector<bool> maskedAxes(inpShape.size(), false);
2188 for (int i = 0; i < axes_dict.size(); ++i)
2190 int axis = axes_dict.getIntValue(i);
2191 CV_CheckLE(axis, static_cast<int>(inpShape.size()), "Squeeze axis");
2192 maskedAxes[axis] = inpShape[axis] == 1;
2195 for (int i = 0; i < inpShape.size(); ++i)
2198 outShape.push_back(inpShape[i]);
2200 if (outShape.size() != inpShape.size())
2202 layerParams.type = "Reshape";
2203 layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
2204 if (hasDynamicShapes)
2206 std::vector<int> dynamicAxes;
2207 std::vector<int> inputIndices;
2208 for (int index = 0; index < inpShape.size(); ++index)
2210 if (!maskedAxes[index])
2211 inputIndices.push_back(index);
2213 for (int index = 0; index < outShape.size(); ++index)
2214 dynamicAxes.push_back(index);
2215 layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
2216 layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
2220 layerParams.type = "Identity";
2222 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2224 Mat inp = getBlob(node_proto, 0);
2225 Mat out = inp.reshape(1, outShape);
2226 out.dims = outShape.size(); // to workaround dims == 1
2227 addConstant(layerParams.name, out);
2230 int depth = layerParams.get<int>("depth", CV_32F);
2231 layerParams.type += (depth == CV_8S) ? "Int8" : "";
2232 addLayer(layerParams, node_proto);
2235 void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2237 opencv_onnx::NodeProto node_proto = node_proto_;
2238 CV_CheckEQ(node_proto.input_size(), 1, "");
2239 int axis_ = layerParams.get<int>("axis", 1);
2240 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2242 Mat input = getBlob(node_proto, 0);
2243 int axis = normalize_axis(axis_, input.dims);
2245 int out_size[2] = {1, 1};
2246 for (int i = 0; i < axis; ++i)
2248 out_size[0] *= input.size[i];
2250 for (int i = axis; i < input.dims; ++i)
2252 out_size[1] *= input.size[i];
2255 Mat output = input.reshape(1, 2, out_size);
2256 addConstant(layerParams.name, output);
2259 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
2260 CV_Assert(shapeIt != outShapes.end());
2261 MatShape inpShape = shapeIt->second;
2262 int axis = normalize_axis(axis_, inpShape.size());
2264 if (axis == 0 || axis == inpShape.size())
2266 LayerParams reshapeLp;
2267 reshapeLp.name = layerParams.name + "/reshape";
2268 reshapeLp.type = "Reshape";
2269 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
2271 inpShape.insert(axis == 0 ? inpShape.begin() : inpShape.end(), 1);
2272 reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
2274 opencv_onnx::NodeProto proto;
2275 proto.add_input(node_proto.input(0));
2276 proto.add_output(reshapeLp.name);
2277 addLayer(reshapeLp, proto);
2278 node_proto.set_input(0, reshapeLp.name);
2282 LayerParams first_pass;
2283 first_pass.name = layerParams.name + "/flatten";
2284 CV_Assert(layer_id.find(first_pass.name) == layer_id.end());
2285 first_pass.type = "Flatten";
2286 first_pass.set("axis", 0);
2287 first_pass.set("end_axis", axis - 1);
2289 opencv_onnx::NodeProto proto;
2290 proto.add_input(node_proto.input(0));
2291 proto.add_output(first_pass.name);
2292 addLayer(first_pass, proto);
2294 layerParams.set("axis", 1);
2295 node_proto.set_input(0, first_pass.name);
2296 addLayer(layerParams, node_proto);
2299 void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2301 CV_Assert(node_proto.input_size() == 1 || node_proto.input_size() == 2);
2303 if (node_proto.input_size() == 2)
2305 Mat blob = getBlob(node_proto, 1);
2306 axes = DictValue::arrayInt(blob.ptr<int>(), blob.total());
2309 axes = layerParams.get("axes");
2311 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2314 Mat input = getBlob(node_proto, 0);
2316 std::vector<int> dims;
2317 for (int j = 0; j < input.dims; j++) {
2318 dims.push_back(input.size[j]);
2320 CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size());
2321 for (int j = 0; j < axes.size(); j++) {
2322 const int idx = axes.getIntValue(j);
2323 CV_Assert(idx <= dims.size());
2324 dims.insert(dims.begin() + idx, 1);
2327 Mat out = input.reshape(0, dims);
2328 addConstant(layerParams.name, out);
2333 if (axes.size() != 1)
2334 CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze");
2336 int depth = layerParams.get<int>("depth", CV_32F);
2338 MatShape inpShape = outShapes[node_proto.input(0)];
2339 int axis = axes.getIntValue(0);
2340 CV_Assert(0 <= axis && axis <= inpShape.size());
2341 std::vector<int> outShape = inpShape;
2342 outShape.insert(outShape.begin() + axis, 1);
2343 layerParams.type = (depth == CV_8S) ? "ReshapeInt8" : "Reshape";
2344 layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
2345 if (hasDynamicShapes)
2347 std::vector<int> dynamicAxes;
2348 std::vector<int> inputIndices;
2349 for (int index = 0; index < outShape.size(); ++index) {
2351 dynamicAxes.push_back(index);
2353 for (int index = 0; index < inpShape.size(); ++index)
2354 inputIndices.push_back(index);
2355 layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
2356 layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
2358 addLayer(layerParams, node_proto);
2361 void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2363 opencv_onnx::NodeProto node_proto = node_proto_;
2364 CV_CheckEQ(node_proto.input_size(), 2, "");
2365 const std::string& input0 = node_proto.input(0);
2366 const std::string& input1 = node_proto.input(1);
2367 Mat newShapeMat = getBlob(input1);
2368 MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
2371 bool haveVariables = constBlobs.find(input0) == constBlobs.end();
2374 IterShape_t shapeIt = outShapes.find(input0);
2375 CV_Assert(shapeIt != outShapes.end());
2376 inpShape = shapeIt->second;
2380 inpShape = shape(getBlob(input0));
2383 String srcName = input0;
2384 // Unsqueeze and repeat along new axis
2385 if (targetShape.size() == inpShape.size() + 1)
2387 inpShape.insert(inpShape.begin(), targetShape.size() - inpShape.size(), 1);
2388 for (int i = 0; i < targetShape.size(); i++)
2390 if (abs(targetShape[i]) == 1)
2391 targetShape[i] = inpShape[i];
2395 LayerParams reshapeLp;
2396 reshapeLp.name = layerParams.name + "/reshape";
2397 reshapeLp.type = "Reshape";
2398 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
2399 reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
2401 opencv_onnx::NodeProto proto;
2402 proto.add_input(node_proto.input(0));
2403 proto.add_output(reshapeLp.name);
2404 addLayer(reshapeLp, proto);
2405 srcName = reshapeLp.name;
2408 CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");
2410 std::vector<int> broadcast_axes;
2411 // shapes aren't right-aligned here because targetShape.size() == inpShape.size()
2412 for (int i = 0; i < targetShape.size(); i++)
2414 if (targetShape[i] != inpShape[i])
2416 if (inpShape[i] == 1)
2418 broadcast_axes.push_back(i);
2420 else if (targetShape[i] != 1)
2422 CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i));
2429 if (broadcast_axes.size() != 1)
2430 CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input");
2432 Mat input = getBlob(node_proto, 0);
2433 input = input.reshape(0, total(inpShape, 0, broadcast_axes[0]));
2434 Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]);
2435 output = output.reshape(0, targetShape);
2436 addConstant(layerParams.name, output);
2440 if (broadcast_axes.size() == 2 &&
2441 broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1)
2443 LayerParams constParams;
2444 constParams.name = layerParams.name + "/const";
2445 CV_Assert(layer_id.find(constParams.name) == layer_id.end());
2446 constParams.type = "Const";
2448 Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr<int>(), CV_32F);
2449 constParams.blobs.push_back(inp);
2451 opencv_onnx::NodeProto proto;
2452 proto.add_output(constParams.name);
2453 addLayer(constParams, proto);
2455 layerParams.type = "Scale";
2456 layerParams.set("bias_term", false);
2457 node_proto.set_input(0, constParams.name);
2458 node_proto.set_input(1, srcName);
2460 else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1)
2462 expandMid(layerParams.name, node_proto, srcName, targetShape[broadcast_axes[0]]);
2464 layerParams.set("axis", broadcast_axes[0]);
2465 layerParams.type = "Concat";
2466 node_proto.set_output(0, layerParams.name);
2468 else if (broadcast_axes.empty())
2470 layerParams.type = "Identity";
2473 CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
2474 addLayer(layerParams, node_proto);
2477 void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2479 CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape"));
2480 int depth = layerParams.get<int>("depth", CV_32F);
2481 layerParams.type += (depth == CV_8S) ? "Int8" : "";
2483 if (node_proto.input_size() == 2) {
2484 Mat blob = getBlob(node_proto, 1);
2485 CV_Assert(blob.type() == CV_32SC1);
2487 layerParams.set("dim", DictValue::arrayInt<int*>(blob.ptr<int>(), blob.total()));
2489 if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
2490 std::vector<Mat> inputs(1, getBlob(node_proto, 0)), outputs;
2491 runLayer(layerParams, inputs, outputs);
2492 addConstant(layerParams.name, outputs[0]);
2497 DictValue shape = layerParams.get("shape");
2498 std::vector<int> dim;
2499 for (int j = 0; j < shape.size(); j++) {
2500 dim.push_back(shape.getIntValue(j));
2503 if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
2504 Mat input = getBlob(node_proto, 0);
2505 Mat out = input.reshape(0, dim);
2506 addConstant(layerParams.name, out);
2509 replaceLayerParam(layerParams, "shape", "dim");
2511 addLayer(layerParams, node_proto);
2514 void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2516 int depth = layerParams.get<int>("depth", CV_32F);
2517 layerParams.type = (depth == CV_8S) ? "PaddingInt8" : "Padding";
2518 replaceLayerParam(layerParams, "mode", "type");
2519 if (node_proto.input_size() == 3 || node_proto.input_size() == 2)
2521 // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
2522 // We need to shuffle it to begin0, end0, begin1, end1, ...
2523 Mat paddings = getBlob(node_proto, 1).reshape(1, 2);
2524 paddings = paddings.t();
2525 layerParams.set("paddings", DictValue::arrayInt(paddings.ptr<int>(), paddings.total()));
2527 if (node_proto.input_size() == 3)
2529 Mat value = getBlob(node_proto, 2);
2530 float padValue = (depth == CV_8S) ? (float)value.ptr<int8_t>()[0] : value.ptr<float>()[0];
2531 layerParams.set("value", padValue);
2534 addLayer(layerParams, node_proto);
2537 void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2539 CV_Assert(node_proto.input_size() == 1);
2540 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
2541 CV_Assert(shapeIt != outShapes.end());
2542 const MatShape& inpShape = shapeIt->second;
2544 int dims = static_cast<int>(inpShape.size());
2545 Mat shapeMat(dims, 1, CV_32S);
2546 bool isDynamicShape = false;
2547 for (int j = 0; j < dims; ++j)
2549 int sz = inpShape[j];
2550 isDynamicShape |= (sz == 0);
2551 shapeMat.at<int>(j) = sz;
2553 shapeMat.dims = 1; // FIXIT Mat 1D
2557 CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0)));
2558 // FIXIT repair assertion
2559 // Disabled to pass face detector tests from #20422
2560 // CV_Assert(!isDynamicShape); // not supported
2562 addConstant(layerParams.name, shapeMat);
2565 void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2567 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2569 Mat blob = getBlob(node_proto, 0);
2571 switch (layerParams.get<int>("to"))
2573 case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break;
2574 case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break;
2575 case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break;
2576 case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break;
2577 case opencv_onnx::TensorProto_DataType_INT8:
2578 case opencv_onnx::TensorProto_DataType_INT16:
2579 case opencv_onnx::TensorProto_DataType_INT32:
2580 case opencv_onnx::TensorProto_DataType_INT64: type = CV_32S; break;
2581 default: type = blob.type();
2584 blob.convertTo(dst, type);
2585 dst.dims = blob.dims;
2586 addConstant(layerParams.name, dst);
2590 layerParams.type = "Identity";
2591 addLayer(layerParams, node_proto);
2594 void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2598 if (!layerParams.blobs.empty())
2600 CV_Assert(!layerParams.has("value"));
2601 depth = layerParams.blobs[0].depth();
2603 layerParams.blobs[0].convertTo(floats, CV_32F);
2604 fill_value = floats.at<float>(0, 0);
2607 fill_value = layerParams.get("value", 0);
2609 MatShape inpShape = getBlob(node_proto, 0);
2610 for (int i = 0; i < inpShape.size(); i++)
2611 CV_CheckGT(inpShape[i], 0, "");
2612 Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value));
2613 addConstant(layerParams.name, tensor);
2616 void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2618 opencv_onnx::NodeProto node_proto = node_proto_;
2619 CV_Assert(node_proto.input_size() == 2);
2620 Mat indexMat = getBlob(node_proto, 1);
2621 CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1);
2622 int index = indexMat.at<int>(0);
2623 int axis = layerParams.get<int>("axis", 0);
2625 if ((constBlobs.find(node_proto.input(0)) != constBlobs.end()))
2627 Mat input = getBlob(node_proto, 0);
2629 std::vector<cv::Range> ranges(input.dims, Range::all());
2630 ranges[axis] = Range(index, index + 1);
2632 out = input(ranges);
2633 MatShape outShape = shape(out);
2634 if (outShape.size() > 1)
2636 outShape.erase(outShape.begin() + axis);
2637 out.reshape(0, outShape);
2641 addConstant(layerParams.name, out);
2646 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
2647 CV_Assert(shapeIt != outShapes.end());
2648 MatShape inpShape = shapeIt->second;
2650 LayerParams sliceLp;
2651 sliceLp.type = "Slice";
2652 sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name;
2653 std::vector<int> begin(inpShape.size(), 0);
2654 std::vector<int> end(inpShape.size(), -1);
2655 begin[axis] = index;
2656 end[axis] = index + 1;
2658 cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin.data(), begin.size());
2659 cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size());
2660 sliceLp.set("begin", paramBegin);
2661 sliceLp.set("end", paramEnd);
2662 sliceLp.set("has_dynamic_shapes", hasDynamicShapes);
2664 if (inpShape.size() > 1)
2666 opencv_onnx::NodeProto proto;
2667 proto.add_input(node_proto.input(0));
2668 proto.add_output(sliceLp.name);
2669 addLayer(sliceLp, proto);
2671 inpShape.erase(inpShape.begin() + axis);
2672 layerParams.type = "Reshape";
2673 layerParams.set("axis", 0);
2674 layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
2675 if (hasDynamicShapes)
2677 std::vector<int> dynamicAxes;
2678 std::vector<int> inputIndices;
2679 for (int index = 0; index < inpShape.size(); ++index)
2680 dynamicAxes.push_back(index);
2681 for (int index = 0; index < inpShape.size(); ++index)
2682 inputIndices.push_back(index);
2683 layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
2684 layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
2686 node_proto.set_input(0, sliceLp.name);
2690 layerParams = sliceLp;
2693 addLayer(layerParams, node_proto);
2696 void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2698 bool hasVariableInps = false;
2699 for (int i = 0; i < node_proto.input_size(); ++i)
2701 if (layer_id.find(node_proto.input(i)) != layer_id.end())
2703 hasVariableInps = true;
2708 if (!hasVariableInps)
2710 std::vector<Mat> inputs(node_proto.input_size()), concatenated;
2711 // Due constant folding we can get inputs with different number of dimensions
2712 // Insert the missing dimension to inputs
2713 MatShape inputShape;
2714 for (size_t i = 0; i < inputs.size(); ++i)
2716 inputs[i] = getBlob(node_proto, i);
2717 if (inputs[i].size.dims() > inputShape.size())
2719 inputShape = shape(inputs[i]);
2723 // Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1
2724 int axis = layerParams.get<int>("axis", 1);
2725 for (size_t i = 0; i < inputs.size(); ++i)
2727 MatShape targetShape = inputShape;
2728 targetShape[axis] = shape(inputs[i])[axis];
2729 CV_CheckEQ(total(targetShape), total(shape(inputs[i])), "");
2730 inputs[i] = inputs[i].reshape(0, targetShape);
2732 runLayer(layerParams, inputs, concatenated);
2734 CV_Assert(concatenated.size() == 1);
2735 addConstant(layerParams.name, concatenated[0]);
2740 for (int i = 0; i < node_proto.input_size(); ++i)
2742 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
2744 LayerParams constParams;
2745 constParams.name = node_proto.input(i);
2746 constParams.type = "Const";
2747 constParams.blobs.push_back(getBlob(node_proto, i));
2749 opencv_onnx::NodeProto proto;
2750 proto.add_output(constParams.name);
2751 addLayer(constParams, proto);
2755 addLayer(layerParams, node_proto);
2758 // https://github.com/onnx/onnx/blob/master/docs/Operators.md#Resize
2759 void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2761 for (int i = 1; i < node_proto.input_size(); i++)
2762 CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());
2764 int depth = layerParams.get<int>("depth", CV_32F);
2765 layerParams.type += (depth == CV_8S) ? "Int8" : "";
2767 if (layerParams.has("coordinate_transformation_mode"))
2769 String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
2770 CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
2772 layerParams.set("align_corners", interp_mode == "align_corners");
2773 if (layerParams.get<String>("mode") == "linear")
2775 layerParams.set("mode", interp_mode == "pytorch_half_pixel" || interp_mode == "half_pixel" ?
2776 "opencv_linear" : "bilinear");
2779 if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
2780 layerParams.set("mode", "opencv_linear");
2782 // opset-10: input = [X, scales]
2783 // opset-11: input = [X, roi, scales] or [x, roi, scales, sizes]
2784 // opset-13: may have empty input, [X, "", "", sizes] or [x, "", scales]
2785 int scalesInputId = node_proto.input_size() == 2 ? 1 : 2;
2786 const std::string& scale_name = node_proto.input(scalesInputId);
2788 if(!scale_name.empty())
2789 scales = getBlob(node_proto, scalesInputId);
2791 if (!scales.empty())
2793 CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected");
2794 layerParams.set("zoom_factor_y", scales.at<float>(2));
2795 layerParams.set("zoom_factor_x", scales.at<float>(3));
2797 else if (node_proto.input_size() >= 4) // opset-11 [x, roi, scales, sizes] or opset-13: input = [X, "", "", sizes]
2799 const std::string& inputSizes = node_proto.input(3);
2800 if (constBlobs.find(inputSizes) != constBlobs.end())
2802 Mat shapes = getBlob(inputSizes);
2803 CV_CheckEQ(shapes.total(), (size_t)4, "HCHW layout is expected");
2804 CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
2805 if (shapes.depth() == CV_32F)
2806 shapes.convertTo(shapes, CV_32S);
2807 layerParams.set("width", shapes.at<int>(3));
2808 layerParams.set("height", shapes.at<int>(2));
2812 CV_Error(Error::StsNotImplemented, cv::format("ONNX/Resize: doesn't support dynamic non-constant 'sizes' input: %s", inputSizes.c_str()));
2817 CV_Error(Error::StsNotImplemented, "ONNX/Resize: can't find neither 'scale' nor destination sizes parameters");
2819 replaceLayerParam(layerParams, "mode", "interpolation");
2820 addLayer(layerParams, node_proto);
2823 void ONNXImporter::parseUpsample(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2825 //fused from Resize Subgraph
2826 if (layerParams.has("coordinate_transformation_mode"))
2828 String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
2829 CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
2831 layerParams.set("align_corners", interp_mode == "align_corners");
2832 if (layerParams.get<String>("mode") == "linear")
2834 layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
2835 "opencv_linear" : "bilinear");
2838 if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
2839 layerParams.set("mode", "opencv_linear");
2841 layerParams.type = "Resize";
2842 if (layerParams.has("scales"))
2845 DictValue scales = layerParams.get("scales");
2846 CV_Assert(scales.size() == 4);
2847 layerParams.set("zoom_factor_y", scales.getIntValue(2));
2848 layerParams.set("zoom_factor_x", scales.getIntValue(3));
2850 else if (layerParams.has("height_scale") && layerParams.has("width_scale"))
2853 replaceLayerParam(layerParams, "height_scale", "zoom_factor_y");
2854 replaceLayerParam(layerParams, "width_scale", "zoom_factor_x");
2859 const std::string& input1 = node_proto.input(1);
2860 if (constBlobs.find(input1) != constBlobs.end())
2862 Mat scales = getBlob(input1);
2863 CV_Assert(scales.total() == 4);
2864 layerParams.set("zoom_factor_y", scales.at<float>(2));
2865 layerParams.set("zoom_factor_x", scales.at<float>(3));
2868 replaceLayerParam(layerParams, "mode", "interpolation");
2869 addLayer(layerParams, node_proto);
2872 void ONNXImporter::parseSoftMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2874 const std::string& layer_type = node_proto.op_type();
2875 layerParams.type = "Softmax";
2876 layerParams.set("log_softmax", layer_type == "LogSoftmax");
2877 addLayer(layerParams, node_proto);
2880 void ONNXImporter::parseDetectionOutput(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2882 opencv_onnx::NodeProto node_proto = node_proto_;
2883 CV_CheckEQ(node_proto.input_size(), 3, "");
2884 if (constBlobs.find(node_proto.input(2)) != constBlobs.end())
2886 Mat priors = getBlob(node_proto, 2);
2888 LayerParams constParams;
2889 constParams.name = layerParams.name + "/priors";
2890 constParams.type = "Const";
2891 constParams.blobs.push_back(priors);
2893 opencv_onnx::NodeProto priorsProto;
2894 priorsProto.add_output(constParams.name);
2895 addLayer(constParams, priorsProto);
2897 node_proto.set_input(2, constParams.name);
2899 addLayer(layerParams, node_proto);
2902 void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2904 layerParams.type = "CumSum";
2907 const std::string& input1 = node_proto.input(1);
2909 if (constBlobs.find(input1) != constBlobs.end())
2911 Mat axis_blob = getBlob(input1);
2912 CV_Assert(axis_blob.total() == 1u);
2913 layerParams.set("axis", axis_blob.at<int>(0));
2916 addLayer(layerParams, node_proto);
2919 void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2921 for (int j = 0; j < node_proto.input_size(); j++) {
2922 if (layer_id.find(node_proto.input(j)) == layer_id.end())
2923 layerParams.blobs.push_back(getBlob(node_proto, j));
2925 addLayer(layerParams, node_proto);
2928 void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2930 const std::string& name = layerParams.name;
2931 std::string& layer_type = layerParams.type;
2932 const std::string& layer_type_domain = node_proto.has_domain() ? node_proto.domain() : std::string();
2933 if (!layer_type_domain.empty() && layer_type_domain != str_domain_ai_onnx)
2935 // append ONNX domain name
2936 static bool DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME = utils::getConfigurationParameterBool("OPENCV_DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME", true);
2937 if (DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME)
2939 layer_type = layer_type_domain + "." + layer_type;
2943 CV_LOG_IF_INFO(NULL, !LayerFactory::isLayerRegistered(layer_type), "DNN/ONNX: unknown node type, try using custom handler for node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
2944 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
2947 parseSimpleLayers(layerParams, node_proto);
2950 void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2952 CV_Assert(node_proto.input_size() == 3);
2953 layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize";
2955 if (node_proto.op_type() == "DequantizeLinear")
2957 Mat scale = getBlob(node_proto, 1);
2958 Mat zeropoint = getBlob(node_proto, 2);
2960 layerParams.set("scales", DictValue::arrayReal(scale.ptr<float>(), 1));
2961 layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr<int8_t>(), 1));
2963 addLayer(layerParams, node_proto);
2966 void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2968 int ninputs = node_proto.input_size();
2969 CV_Assert(ninputs == 8 || ninputs == 9);
2971 Mat inp_sc = getBlob(node_proto, 1);
2972 Mat inp_zp = getBlob(node_proto, 2);
2974 Mat weights = getBlob(node_proto, 3);
2975 int outCn = weights.size[0];
2976 Mat w_scale = getBlob(node_proto, 4);
2977 CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
2978 Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
2980 Mat out_sc = getBlob(node_proto, 6);
2981 Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S);
2983 Mat weights_2d = weights.reshape(1, outCn);
2984 Mat biasFused(1, outCn, CV_32S);
2985 Mat outputMultiplier(1, outCn, CV_32F);
2986 for (int i = 0; i < outCn; i++)
2988 biasFused.at<int>(i) = bias.at<int>(i) - inp_zp.at<int8_t>(0)*(cv::sum(weights_2d.row(i))[0]);
2989 outputMultiplier.at<float>(i) = (inp_sc.at<float>(0) * wt_sc.at<float>(i)) / out_sc.at<float>(0);
2992 layerParams.type = "ConvolutionInt8";
2993 layerParams.set("num_output", outCn);
2994 layerParams.set("input_zeropoint", inp_zp.at<int8_t>(0));
2995 layerParams.blobs.push_back(weights);
2996 layerParams.blobs.push_back(biasFused);
2997 layerParams.blobs.push_back(outputMultiplier);
2998 addLayer(layerParams, node_proto);
3001 void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3003 int ninputs = node_proto.input_size();
3004 CV_Assert(ninputs == 8);
3006 if (constBlobs.find(node_proto.input(3)) == constBlobs.end())
3007 CV_Error(Error::StsNotImplemented, "Variable weights is not supported");
3009 int firstInpDims = outShapes[node_proto.input(0)].size();
3011 Mat inp_sc = getBlob(node_proto, 1);
3012 Mat inp_zp = getBlob(node_proto, 2);
3014 Mat weights = getBlob(node_proto, 3).t();
3015 int outCn = weights.size[0];
3016 int secondInpDims = weights.dims;
3018 Mat w_scale = getBlob(node_proto, 4);
3019 CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
3020 Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
3021 Mat out_sc = getBlob(node_proto, 6);
3023 Mat bias(1, outCn, CV_32S);
3024 Mat outputMultiplier(1, outCn, CV_32F);
3025 for (int i = 0; i < outCn; i++)
3027 bias.at<int>(i) = -inp_zp.at<int8_t>(0)*(cv::sum(weights.row(i))[0]);
3028 outputMultiplier.at<float>(i) = (inp_sc.at<float>(0) * wt_sc.at<float>(i)) / out_sc.at<float>(0);
3031 layerParams.type = "InnerProductInt8";
3032 layerParams.set("num_output", outCn);
3033 layerParams.set("axis", firstInpDims - secondInpDims + 1);
3034 layerParams.blobs.push_back(weights);
3035 layerParams.blobs.push_back(bias);
3036 layerParams.blobs.push_back(outputMultiplier);
3037 addLayer(layerParams, node_proto);
3040 void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
3042 opencv_onnx::NodeProto node_proto = node_proto_;
3043 CV_Assert(node_proto.input_size() == 8);
3044 std::string op = (node_proto.op_type() == "QLinearAdd") ? "sum" : "prod";
3046 for (int i = 0; i < 4; i += 3)
3048 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3052 Mat inp_0_sc = getBlob(node_proto, 1);
3053 Mat inp_0_zp = getBlob(node_proto, 2);
3055 Mat inp_1_sc = getBlob(node_proto, 4);
3056 Mat inp_1_zp = getBlob(node_proto, 5);
3058 // Set 2nd input as the const input
3061 cv::swap(inp_0_sc, inp_1_sc);
3062 cv::swap(inp_0_zp, inp_1_zp);
3065 float out_sc = getBlob(node_proto, 6).at<float>(0);
3066 int8_t out_zp = getBlob(node_proto, 7).at<int8_t>(0);
3068 std::vector<float> inp_scales = {inp_0_sc.at<float>(0), inp_1_sc.at<float>(0)};
3069 std::vector<int8_t> inp_zps = {inp_0_zp.at<int8_t>(0), inp_1_zp.at<int8_t>(0)};
3071 std::vector<float> coeffs;
3075 coeffs = {inp_scales[0]/out_sc, inp_scales[1]/out_sc};
3076 offset = out_zp - coeffs[0]*inp_zps[0] - coeffs[1]*inp_zps[1];
3080 coeffs = {inp_scales[0]/out_sc, inp_scales[1]};
3086 Mat blob = getBlob(node_proto, constId);
3087 if (blob.total() == 1)
3089 float val = inp_scales[1] * (blob.at<int8_t>(0) - inp_zps[1]);
3090 float scale = inp_scales[0] / out_sc;
3094 float shift = out_zp - scale*inp_zps[0];
3096 shift += (val/out_sc);
3098 LayerParams rescaleParams;
3099 rescaleParams.name = layerParams.name;
3100 rescaleParams.type = "Requantize";
3101 rescaleParams.set("depth", CV_8S);
3102 rescaleParams.set("scale", scale);
3103 rescaleParams.set("shift", shift);
3104 addLayer(rescaleParams, node_proto);
3109 MatShape inpShape = outShapes[node_proto.input(3 - constId)];
3113 if (shape(blob) == inpShape)
3115 LayerParams constParams;
3116 constParams.name = layerParams.name + "/const";
3117 constParams.type = "ConstInt8";
3118 constParams.set("depth", CV_8S);
3119 constParams.set("scales", DictValue::arrayReal(inp_1_sc.ptr<float>(), 1));
3120 constParams.set("zeropoints", DictValue::arrayInt(inp_1_zp.ptr<int8_t>(), 1));
3121 constParams.blobs.push_back(blob);
3123 int id = dstNet.addLayer(constParams.name, constParams.type, CV_8S, constParams);
3124 layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
3125 outShapes[constParams.name] = shape(blob);
3126 node_proto.set_input(constId, constParams.name);
3128 layerParams.type = "EltwiseInt8";
3129 layerParams.set("operation", op);
3130 layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
3131 layerParams.set("offset", offset);
3135 layerParams.type = "ScaleInt8";
3136 layerParams.set("bias_term", op == "sum");
3138 for (int i = 0; i < graph_proto.initializer_size(); i++)
3140 opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
3141 if (tensor_proto.name() == node_proto.input(constId))
3143 axis = inpShape.size() - tensor_proto.dims_size();
3147 layerParams.set("axis", axis);
3148 blob = blob.reshape(1, 1);
3149 Mat blob_dequantized;
3150 blob.convertTo(blob_dequantized, CV_32F, inp_scales[1], -(inp_scales[1] * inp_zps[1]));
3151 layerParams.blobs.push_back(blob_dequantized);
3152 layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size()));
3156 else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(3)])
3158 layerParams.type = "EltwiseInt8";
3159 layerParams.set("operation", op);
3160 layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
3161 layerParams.set("offset", offset);
3165 layerParams.type = "ScaleInt8";
3166 layerParams.set("bias_term", op == "sum");
3167 layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size()));
3170 layerParams.set("input_zeropoints", DictValue::arrayInt(inp_zps.data(), inp_zps.size()));
3171 addLayer(layerParams, node_proto);
3174 void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3176 CV_Assert(node_proto.input_size() == 5);
3178 float slope = layerParams.get<float>("alpha");
3179 float inp_sc = getBlob(node_proto, 1).at<float>(0);
3180 int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
3181 float out_sc = getBlob(node_proto, 3).at<float>(0);
3182 int8_t out_zp = getBlob(node_proto, 4).at<int8_t>(0);
3184 Mat lookUpTable(1, 256, CV_8S);
3185 int8_t* table = lookUpTable.ptr<int8_t>();
3186 for (int i = -128; i < 128; i++)
3188 float x = inp_sc*(i - inp_zp);
3189 float y = x >= 0.f ? x : slope*x;
3190 int quantized = out_zp + cvRound(y/out_sc);
3191 table[i+128] = saturate_cast<int8_t>(quantized);
3194 layerParams.type = "ReLUInt8";
3195 layerParams.blobs.push_back(lookUpTable);
3196 addLayer(layerParams, node_proto);
3199 void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3201 CV_Assert(node_proto.input_size() == 5);
3203 float inp_sc = getBlob(node_proto, 1).at<float>(0);
3204 int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
3205 float out_sc = getBlob(node_proto, 3).at<float>(0);
3206 int8_t out_zp = getBlob(node_proto, 4).at<int8_t>(0);
3208 Mat lookUpTable(1, 256, CV_8S);
3209 int8_t* table = lookUpTable.ptr<int8_t>();
3210 for (int i = -128; i < 128; i++)
3212 float x = inp_sc*(i - inp_zp);
3213 float y = 1.f/(1.f + std::exp(-x));
3214 int quantized = out_zp + cvRound(y/out_sc);
3215 table[i+128] = saturate_cast<int8_t>(quantized);
3218 layerParams.type = "SigmoidInt8";
3219 layerParams.blobs.push_back(lookUpTable);
3220 addLayer(layerParams, node_proto);
3223 void ONNXImporter::parseQAvgPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3225 CV_Assert(node_proto.input_size() == 5);
3226 float inp_sc = getBlob(node_proto, 1).at<float>(0);
3227 int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
3228 float out_sc = getBlob(node_proto, 3).at<float>(0);
3230 layerParams.type = "PoolingInt8";
3231 layerParams.set("pool", "ave");
3232 layerParams.set("global_pooling", node_proto.op_type() == "QLinearGlobalAveragePool");
3233 layerParams.set("multiplier", inp_sc/out_sc);
3234 layerParams.set("input_zeropoint", inp_zp);
3235 addLayer(layerParams, node_proto);
3238 void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
3240 opencv_onnx::NodeProto node_proto = node_proto_;
3241 layerParams.type = "ConcatInt8";
3242 int num_inputs = node_proto.input_size();
3244 float out_scale = getBlob(node_proto, 0).at<float>(0);
3245 int out_zp = getBlob(node_proto, 1).at<int8_t>(0);
3247 for (int i = 2; i < num_inputs; i += 3)
3249 float inp_scale = getBlob(node_proto, i + 1).at<float>(0);
3250 int inp_zp = getBlob(node_proto, i + 2).at<int8_t>(0);
3252 if (inp_scale != out_scale || inp_zp != out_zp)
3254 float scale = inp_scale/out_scale;
3255 float shift = out_zp - scale*inp_zp;
3257 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3259 Mat blob = getBlob(node_proto, i);
3261 blob.convertTo(blob_rescaled, CV_8S, scale, shift);
3262 constBlobs[node_proto.input(i)] = blob_rescaled;
3266 LayerParams rescaleParams;
3267 rescaleParams.name = node_proto.input(i) + "/rescale";
3268 rescaleParams.type = "Requantize";
3269 rescaleParams.set("depth", CV_8S);
3270 rescaleParams.set("scale", scale);
3271 rescaleParams.set("shift", shift);
3273 opencv_onnx::NodeProto proto;
3274 proto.add_input(node_proto.input(i));
3275 proto.add_output(rescaleParams.name);
3276 addLayer(rescaleParams, proto);
3277 node_proto.set_input(i, rescaleParams.name);
3282 bool hasVariableInps = false;
3283 for (int i = 2; i < num_inputs; i += 3)
3285 if (layer_id.find(node_proto.input(i)) != layer_id.end())
3287 hasVariableInps = true;
3292 if (!hasVariableInps)
3294 std::vector<Mat> inputs, concatenated;
3295 MatShape inputShape;
3296 for (size_t i = 2; i < num_inputs; i += 3)
3298 Mat blob = getBlob(node_proto, i);
3299 if (blob.size.dims() > inputShape.size())
3301 inputShape = shape(blob);
3303 inputs.push_back(blob);
3306 int axis = layerParams.get<int>("axis", 1);
3307 for (size_t i = 0; i < inputs.size(); ++i)
3309 MatShape targetShape = inputShape;
3310 targetShape[axis] = shape(inputs[i])[axis];
3311 CV_CheckEQ(total(targetShape), total(shape(inputs[i])), "");
3312 inputs[i] = inputs[i].reshape(0, targetShape);
3314 runLayer(layerParams, inputs, concatenated);
3315 CV_Assert(concatenated.size() == 1);
3316 addConstant(layerParams.name, concatenated[0]);
3321 for (int i = 2; i < num_inputs; i += 3)
3323 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3325 LayerParams constParams;
3326 constParams.name = node_proto.input(i);
3327 constParams.type = "ConstInt8";
3328 constParams.blobs.push_back(getBlob(node_proto, i));
3329 constParams.set("depth", CV_8S);
3331 opencv_onnx::NodeProto proto;
3332 proto.add_output(constParams.name);
3333 addLayer(constParams, proto);
3337 addLayer(layerParams, node_proto);
3340 // Domain: ai.onnx (default)
3341 // URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md
3342 void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
3344 CV_UNUSED(opset_version);
3345 DispatchMap dispatch;
3347 dispatch["ArgMax"] = dispatch["ArgMin"] = &ONNXImporter::parseArg;
3348 dispatch["MaxUnpool"] = &ONNXImporter::parseMaxUnpool;
3349 dispatch["MaxPool"] = &ONNXImporter::parseMaxPool;
3350 dispatch["AveragePool"] = &ONNXImporter::parseAveragePool;
3351 dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] =
3352 dispatch["ReduceMax"] = &ONNXImporter::parseReduce;
3353 dispatch["Slice"] = &ONNXImporter::parseSlice;
3354 dispatch["Split"] = &ONNXImporter::parseSplit;
3355 dispatch["Add"] = dispatch["Sum"] = dispatch["Sub"] = &ONNXImporter::parseBias;
3356 dispatch["Pow"] = &ONNXImporter::parsePow;
3357 dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseMinMax;
3358 dispatch["Neg"] = &ONNXImporter::parseNeg;
3359 dispatch["Constant"] = &ONNXImporter::parseConstant;
3360 dispatch["LSTM"] = &ONNXImporter::parseLSTM;
3361 dispatch["GRU"] = &ONNXImporter::parseGRU;
3362 dispatch["ImageScaler"] = &ONNXImporter::parseImageScaler;
3363 dispatch["Clip"] = &ONNXImporter::parseClip;
3364 dispatch["LeakyRelu"] = &ONNXImporter::parseLeakyRelu;
3365 dispatch["Relu"] = &ONNXImporter::parseRelu;
3366 dispatch["Elu"] = &ONNXImporter::parseElu;
3367 dispatch["Tanh"] = &ONNXImporter::parseTanh;
3368 dispatch["Abs"] = &ONNXImporter::parseAbs;
3369 dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = &ONNXImporter::parseCompare;
3370 dispatch["PRelu"] = &ONNXImporter::parsePRelu;
3371 dispatch["LRN"] = &ONNXImporter::parseLRN;
3372 dispatch["InstanceNormalization"] = &ONNXImporter::parseInstanceNormalization;
3373 dispatch["BatchNormalization"] = &ONNXImporter::parseBatchNormalization;
3374 dispatch["Gemm"] = &ONNXImporter::parseGemm;
3375 dispatch["MatMul"] = &ONNXImporter::parseMatMul;
3376 dispatch["Mul"] = dispatch["Div"] = &ONNXImporter::parseMul;
3377 dispatch["Conv"] = &ONNXImporter::parseConv;
3378 dispatch["ConvTranspose"] = &ONNXImporter::parseConvTranspose;
3379 dispatch["Transpose"] = &ONNXImporter::parseTranspose;
3380 dispatch["Squeeze"] = &ONNXImporter::parseSqueeze;
3381 dispatch["Flatten"] = &ONNXImporter::parseFlatten;
3382 dispatch["Unsqueeze"] = &ONNXImporter::parseUnsqueeze;
3383 dispatch["Expand"] = &ONNXImporter::parseExpand;
3384 dispatch["Reshape"] = &ONNXImporter::parseReshape;
3385 dispatch["Pad"] = &ONNXImporter::parsePad;
3386 dispatch["Shape"] = &ONNXImporter::parseShape;
3387 dispatch["Cast"] = &ONNXImporter::parseCast;
3388 dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter::parseConstantFill;
3389 dispatch["Gather"] = &ONNXImporter::parseGather;
3390 dispatch["Concat"] = &ONNXImporter::parseConcat;
3391 dispatch["Resize"] = &ONNXImporter::parseResize;
3392 dispatch["Upsample"] = &ONNXImporter::parseUpsample;
3393 dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax;
3394 dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput;
3395 dispatch["CumSum"] = &ONNXImporter::parseCumSum;
3397 std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
3398 "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
3399 "Identity", "Log", "Round", "Selu", "Sigmoid", "Sin", "Sinh", "Softmax",
3400 "Softplus", "Softsign", "Sqrt", "Tan", "ThresholdedRelu"};
3401 for (const auto& name : simpleLayers)
3403 dispatch[name] = &ONNXImporter::parseSimpleLayers;
3406 // ai.onnx: opset 10+
3407 dispatch["QuantizeLinear"] = dispatch["DequantizeLinear"] = &ONNXImporter::parseQuantDequant;
3408 dispatch["QLinearConv"] = &ONNXImporter::parseQConv;
3409 dispatch["QLinearMatMul"] = &ONNXImporter::parseQMatMul;
3411 domain_dispatch_map[str_domain_ai_onnx] = dispatch;
3414 // Domain: com.microsoft
3415 // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
3416 void ONNXImporter::buildDispatchMap_COM_MICROSOFT(int opset_version)
3418 CV_UNUSED(opset_version);
3419 DispatchMap dispatch;
3421 dispatch["QLinearAdd"] = dispatch["QLinearMul"] = &ONNXImporter::parseQEltwise;
3422 dispatch["QLinearAveragePool"] = dispatch["QLinearGlobalAveragePool"] = &ONNXImporter::parseQAvgPool;
3423 dispatch["QLinearLeakyRelu"] = &ONNXImporter::parseQLeakyRelu;
3424 dispatch["QLinearSigmoid"] = &ONNXImporter::parseQSigmoid;
3425 dispatch["QLinearConcat"] = &ONNXImporter::parseQConcat;
3427 domain_dispatch_map["com.microsoft"] = dispatch;
3431 Net readNetFromONNX(const String& onnxFile)
3433 return detail::readNetDiagnostic<ONNXImporter>(onnxFile.c_str());
3436 Net readNetFromONNX(const char* buffer, size_t sizeBuffer)
3438 return detail::readNetDiagnostic<ONNXImporter>(buffer, sizeBuffer);
3441 Net readNetFromONNX(const std::vector<uchar>& buffer)
3443 return readNetFromONNX(reinterpret_cast<const char*>(buffer.data()), buffer.size());
3446 Mat readTensorFromONNX(const String& path)
3448 std::fstream input(path.c_str(), std::ios::in | std::ios::binary);
3451 CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", path.c_str()));
3454 opencv_onnx::TensorProto tensor_proto = opencv_onnx::TensorProto();
3455 if (!tensor_proto.ParseFromIstream(&input))
3457 CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX data: %s", path.c_str()));
3459 Mat mat = getMatFromTensor(tensor_proto);
3460 releaseONNXTensor(tensor_proto);
3464 CV__DNN_INLINE_NS_END