1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2018, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
8 #include "../precomp.hpp"
9 #include <opencv2/dnn/shape_utils.hpp>
11 #include <opencv2/dnn/layer_reg.private.hpp>
13 #include <opencv2/core/utils/fp_control_utils.hpp>
15 #include <opencv2/core/utils/logger.defines.hpp>
16 #undef CV_LOG_STRIP_LEVEL
17 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
18 #include <opencv2/core/utils/logger.hpp>
20 #include <opencv2/core/utils/configuration.private.hpp>
31 #if defined _MSC_VER && _MSC_VER < 1910/*MSVS 2017*/
33 #pragma warning(disable: 4503) // decorated name length exceeded, name was truncated
36 #if defined(__GNUC__) && __GNUC__ >= 5
37 #pragma GCC diagnostic push
38 #pragma GCC diagnostic ignored "-Wsuggest-override"
40 #include "opencv-onnx.pb.h"
41 #if defined(__GNUC__) && __GNUC__ >= 5
42 #pragma GCC diagnostic pop
45 #include "onnx_graph_simplifier.hpp"
49 CV__DNN_INLINE_NS_BEGIN
51 extern bool DNN_DIAGNOSTICS_RUN;
53 class ONNXLayerHandler;
57 FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
59 opencv_onnx::ModelProto model_proto;
63 LayerInfo(int _layerId = 0, int _outputId = 0) : layerId(_layerId), outputId(_outputId) {}
66 std::map<std::string, Mat> getGraphTensors(
67 const opencv_onnx::GraphProto& graph_proto);
68 Mat getBlob(const opencv_onnx::NodeProto& node_proto, int index);
69 Mat getBlob(const std::string& input_name);
71 LayerParams getLayerParams(const opencv_onnx::NodeProto& node_proto);
73 void addConstant(const std::string& name, const Mat& blob);
74 void addLayer(LayerParams& layerParams,
75 const opencv_onnx::NodeProto& node_proto);
76 void handleQuantizedNode(LayerParams& layerParams,
77 const opencv_onnx::NodeProto& node_proto);
79 void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
80 const std::string& input, size_t n);
81 void addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id);
82 void lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size);
83 void lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n);
84 std::string lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n);
85 std::string lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto,
86 int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name,
88 void lstm_add_transform(int num_directions, int batch_size, int hidden_size,
89 int index, const std::string& input_name, const std::string& output_name);
91 ONNXImporter(Net& net, const char *onnxFile);
92 ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer);
97 std::unique_ptr<ONNXLayerHandler> layerHandler;
100 opencv_onnx::GraphProto graph_proto;
101 std::string framework_name;
103 std::map<std::string, Mat> constBlobs;
105 std::map<std::string, MatShape> outShapes; // List of internal blobs shapes.
106 bool hasDynamicShapes; // Whether the model has inputs with dynamic shapes
107 typedef std::map<std::string, MatShape>::iterator IterShape_t;
109 std::map<std::string, LayerInfo> layer_id;
110 typedef std::map<std::string, LayerInfo>::iterator IterLayerId_t;
111 typedef std::map<std::string, LayerInfo>::const_iterator ConstIterLayerId_t;
113 void handleNode(const opencv_onnx::NodeProto& node_proto);
116 friend class ONNXLayerHandler;
117 typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
118 typedef std::map<std::string, ONNXImporterNodeParser> DispatchMap;
119 typedef std::map<std::string, DispatchMap> DomainDispatchMap;
121 DomainDispatchMap domain_dispatch_map;
122 std::string getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto);
123 const DispatchMap& getDispatchMap(const opencv_onnx::NodeProto& node_proto);
124 void buildDispatchMap_ONNX_AI(int opset_version);
125 void buildDispatchMap_COM_MICROSOFT(int opset_version);
127 // Domain: 'ai.onnx' (default)
128 // URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md
129 void parseArg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
130 void parseMaxUnpool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
131 void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
132 void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
133 void parseGlobalPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
134 void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
135 void parseSlice (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
136 void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
137 void parseBias (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
138 void parsePow (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
139 void parseMinMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
140 void parseNeg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
141 void parseConstant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
142 void parseLSTM (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
143 void parseGRU (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
144 void parseImageScaler (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
145 void parseClip (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
146 void parseLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
147 void parseRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
148 void parseElu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
149 void parseTanh (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
150 void parseAbs (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
151 void parseCompare (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
152 void parsePRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
153 void parseLRN (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
154 void parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
155 void parseBatchNormalization (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
156 void parseGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
157 void parseMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
158 void parseMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
159 void parseConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
160 void parseConvTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
161 void parseTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
162 void parseSqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
163 void parseFlatten (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
164 void parseUnsqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
165 void parseExpand (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
166 void parseReshape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
167 void parsePad (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
168 void parseShape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
169 void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
170 void parseConstantFill (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
171 void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
172 void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
173 void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
174 void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
175 void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
176 void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
177 void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
178 void parseDepthToSpace (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
179 void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
181 // Domain: com.microsoft
182 // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
183 void parseQuantDequant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
184 void parseQConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
185 void parseQMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
186 void parseQEltwise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
187 void parseQLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
188 void parseQSigmoid (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
189 void parseQAvgPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
190 void parseQConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
192 // '???' domain or '???' layer type
193 void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
195 int onnx_opset; // OperatorSetIdProto for 'onnx' domain
196 std::map<std::string, int> onnx_opset_map; // map from OperatorSetIdProto
197 void parseOperatorSet();
199 const std::string str_domain_ai_onnx = "ai.onnx";
203 bool getParamUseLegacyNames()
205 bool param = utils::getConfigurationParameterBool("OPENCV_DNN_ONNX_USE_LEGACY_NAMES", false);
208 std::string extractNodeName(const opencv_onnx::NodeProto& node_proto);
212 class ONNXLayerHandler : public detail::LayerHandler
215 explicit ONNXLayerHandler(ONNXImporter* importer_);
217 void fillRegistry(const opencv_onnx::GraphProto& net);
220 ONNXImporter* importer;
223 ONNXLayerHandler::ONNXLayerHandler(ONNXImporter* importer_) : importer(importer_){}
225 void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net)
227 int layersSize = net.node_size();
228 for (int li = 0; li < layersSize; li++) {
229 const opencv_onnx::NodeProto &node_proto = net.node(li);
230 const std::string& name = node_proto.output(0);
231 const std::string& type = node_proto.op_type();
232 const std::string& layer_type_domain = importer->getLayerTypeDomain(node_proto);
233 const auto& dispatch = importer->getDispatchMap(node_proto);
234 if (dispatch.find(type) == dispatch.end())
236 addMissing(name, cv::format("%s.%s", layer_type_domain.c_str(), type.c_str()));
242 ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
243 : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
246 , useLegacyNames(getParamUseLegacyNames())
248 hasDynamicShapes = false;
250 CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);
252 std::fstream input(onnxFile, std::ios::in | std::ios::binary);
255 CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile));
258 if (!model_proto.ParseFromIstream(&input))
260 CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile));
266 ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
267 : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
270 , useLegacyNames(getParamUseLegacyNames())
272 hasDynamicShapes = false;
273 CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
275 struct _Buf : public std::streambuf
277 _Buf(const char* buffer, size_t sizeBuffer)
279 char* p = const_cast<char*>(buffer);
280 setg(p, p, p + sizeBuffer);
284 _Buf buf(buffer, sizeBuffer);
285 std::istream input(&buf);
287 if (!model_proto.ParseFromIstream(&input))
288 CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array.");
294 inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey)
296 if (layerParams.has(oldKey)) {
297 layerParams.set(newKey, layerParams.get(oldKey));
298 layerParams.erase(oldKey);
303 void dumpValueInfoProto(int i, const opencv_onnx::ValueInfoProto& valueInfoProto, const std::string& prefix)
305 CV_Assert(valueInfoProto.has_name());
306 CV_Assert(valueInfoProto.has_type());
307 const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
308 CV_Assert(typeProto.has_tensor_type());
309 const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
310 CV_Assert(tensor.has_shape());
311 const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
313 int dim_size = tensorShape.dim_size();
314 CV_CheckGE(dim_size, 0, "");
315 MatShape shape(dim_size);
316 for (int j = 0; j < dim_size; ++j)
318 const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
319 if (dimension.has_dim_param())
321 CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
323 // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
324 if (dimension.has_denotation())
326 CV_LOG_INFO(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
328 shape[j] = dimension.dim_value();
330 CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << " as '" << valueInfoProto.name() << "'] shape=" << toString(shape));
334 void dumpTensorProto(int i, const opencv_onnx::TensorProto& tensorProto, const std::string& prefix)
336 if (utils::logging::getLogLevel() < utils::logging::LOG_LEVEL_VERBOSE)
338 int dim_size = tensorProto.dims_size();
339 CV_CheckGE(dim_size, 0, "");
340 MatShape shape(dim_size);
341 for (int j = 0; j < dim_size; ++j)
343 int sz = static_cast<int>(tensorProto.dims(j));
346 CV_LOG_VERBOSE(NULL, 0, "DNN/ONNX: " << prefix << "[" << i << " as '" << tensorProto.name() << "'] shape=" << toString(shape) << " data_type=" << (int)tensorProto.data_type());
349 void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto)
351 if (!tensor_proto.raw_data().empty()) {
352 delete tensor_proto.release_raw_data();
356 void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
357 std::vector<Mat>& outputs)
359 Ptr<Layer> layer = LayerFactory::createLayerInstance(params.type, params);
360 CV_Assert((bool)layer);
362 std::vector<MatShape> inpShapes(inputs.size());
363 int ddepth = params.get<int>("depth", CV_32F);
364 for (size_t i = 0; i < inputs.size(); ++i)
366 inpShapes[i] = shape(inputs[i]);
367 if (i > 0 && ddepth != inputs[i].depth())
368 CV_Error(Error::StsNotImplemented, "Mixed input data types.");
369 ddepth = inputs[i].depth();
372 std::vector<MatShape> outShapes, internalShapes;
373 layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes);
375 std::vector<Mat> internals(internalShapes.size());
376 outputs.resize(outShapes.size());
377 for (size_t i = 0; i < outShapes.size(); ++i)
378 outputs[i].create(outShapes[i], ddepth);
379 for (size_t i = 0; i < internalShapes.size(); ++i)
380 internals[i].create(internalShapes[i], ddepth);
382 layer->finalize(inputs, outputs);
383 layer->forward(inputs, outputs, internals);
386 std::map<std::string, Mat> ONNXImporter::getGraphTensors(
387 const opencv_onnx::GraphProto& graph_proto)
389 std::map<std::string, Mat> layers_weights;
391 for (int i = 0; i < graph_proto.initializer_size(); i++)
393 const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i);
394 dumpTensorProto(i, tensor_proto, "initializer");
395 Mat mat = getMatFromTensor(tensor_proto);
396 releaseONNXTensor(const_cast<opencv_onnx::TensorProto&>(tensor_proto)); // drop already loaded data
398 if (DNN_DIAGNOSTICS_RUN && mat.empty())
401 layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
403 return layers_weights;
406 static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
407 std::vector<int32_t> dst(src.size());
408 convertInt64ToInt32(src, dst, src.size());
409 return DictValue::arrayInt(&dst[0], src.size());
412 static DictValue parseStr(const ::google::protobuf::RepeatedPtrField< ::std::string>& src) {
413 return DictValue::arrayString(src.begin(), static_cast<int>(src.size()));
416 LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto)
419 for(int i = 0; i < node_proto.attribute_size(); i++)
421 opencv_onnx::AttributeProto attribute_proto = node_proto.attribute(i);
422 std::string attribute_name = attribute_proto.name();
426 if(attribute_name == "kernel_shape")
428 CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
429 lp.set("kernel_size", parse(attribute_proto.ints()));
431 else if(attribute_name == "strides")
433 CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
434 lp.set("stride", parse(attribute_proto.ints()));
436 else if(attribute_name == "pads")
438 if (node_proto.op_type() == "Pad")
441 // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
442 // We need to shuffle it to begin0, end0, begin1, end1, ...
443 CV_Assert(attribute_proto.ints_size() % 2 == 0);
444 const int dims = attribute_proto.ints_size() / 2;
445 std::vector<int32_t> paddings;
446 paddings.reserve(attribute_proto.ints_size());
447 for (int i = 0; i < dims; ++i)
449 paddings.push_back(attribute_proto.ints(i));
450 paddings.push_back(attribute_proto.ints(dims + i));
452 lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
456 // Convolution or pooling.
457 CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
458 lp.set("pad", parse(attribute_proto.ints()));
461 else if(attribute_name == "auto_pad")
463 if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") {
464 lp.set("pad_mode", "SAME");
466 else if (attribute_proto.s() == "VALID") {
467 lp.set("pad_mode", "VALID");
470 else if(attribute_name == "dilations")
472 CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
473 lp.set("dilation", parse(attribute_proto.ints()));
475 else if(attribute_name == "activations" && node_proto.op_type() == "LSTM")
477 lp.set(attribute_name, parseStr(attribute_proto.strings()));
479 else if (attribute_proto.has_i())
481 ::google::protobuf::int64 src = attribute_proto.i();
482 if (src < std::numeric_limits<int32_t>::min() || src > std::numeric_limits<int32_t>::max())
483 CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range");
485 lp.set(attribute_name, saturate_cast<int32_t>(src));
487 else if (attribute_proto.has_f())
489 lp.set(attribute_name, attribute_proto.f());
491 else if (attribute_proto.has_s())
493 lp.set(attribute_name, attribute_proto.s());
495 else if (attribute_proto.floats_size() > 0)
497 lp.set(attribute_name, DictValue::arrayReal(
498 attribute_proto.floats().data(), attribute_proto.floats_size()));
500 else if (attribute_proto.ints_size() > 0)
502 lp.set(attribute_name, parse(attribute_proto.ints()));
504 else if (attribute_proto.has_t())
506 opencv_onnx::TensorProto tensor = attribute_proto.t();
507 Mat blob = getMatFromTensor(tensor);
508 lp.blobs.push_back(blob);
510 else if (attribute_proto.has_g())
512 CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str()));
514 else if (attribute_proto.graphs_size() > 0)
516 CV_Error(Error::StsNotImplemented,
517 cv::format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported",
518 attribute_name.c_str(), attribute_proto.graphs_size())
521 else if (attribute_proto.strings_size() > 0)
523 std::string msg = cv::format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported",
524 attribute_name.c_str(), attribute_proto.strings_size());
525 CV_LOG_ERROR(NULL, msg);
526 for (int i = 0; i < attribute_proto.strings_size(); i++)
528 CV_LOG_ERROR(NULL, " Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'");
530 CV_Error(Error::StsNotImplemented, msg);
532 else if (attribute_proto.tensors_size() > 0)
534 CV_Error(Error::StsNotImplemented,
535 cv::format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported",
536 attribute_name.c_str(), attribute_proto.tensors_size())
541 CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str()));
544 catch (const cv::Exception& e)
547 if (DNN_DIAGNOSTICS_RUN)
549 CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem with processing attributes for node " << node_proto.name() << " Attribute " << attribute_name.c_str()
559 Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, int index)
561 CV_Assert(index < node_proto.input_size());
562 const std::string& input_name = node_proto.input(index);
563 return getBlob(input_name);
566 Mat ONNXImporter::getBlob(const std::string& input_name)
568 std::map<std::string, Mat>::const_iterator constBlob = constBlobs.find(input_name);
569 if (constBlob == constBlobs.end())
571 CV_Error(Error::StsBadArg, std::string("Blob ") + input_name + " not found in const blobs");
573 return constBlob->second;
576 void ONNXImporter::addLayer(LayerParams& layerParams,
577 const opencv_onnx::NodeProto& node_proto)
579 int depth = layerParams.get<int>("depth", CV_32F);
580 int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams);
581 for (int i = 0; i < node_proto.output_size(); ++i)
583 const std::string& output_name = node_proto.output(i);
584 if (!output_name.empty())
586 layer_id.insert(std::make_pair(output_name, LayerInfo(id, i)));
590 std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
592 for (int j = 0; j < node_proto.input_size(); j++)
594 const std::string& input_name = node_proto.input(j);
595 IterLayerId_t layerId = layer_id.find(input_name);
596 if (layerId != layer_id.end()) {
597 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum);
599 // Collect input shapes.
600 IterShape_t shapeIt = outShapes.find(input_name);
601 CV_Assert(shapeIt != outShapes.end());
602 layerInpShapes.push_back(shapeIt->second);
605 // Compute shape of output blob for this layer.
606 Ptr<Layer> layer = dstNet.getLayer(id); // FIXIT: avoid instantiation of layers during the import stage
607 layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
608 for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i)
610 const std::string& output_name = node_proto.output(i);
611 if (!output_name.empty())
613 outShapes[node_proto.output(i)] = layerOutShapes[i];
618 /** @brief Make N copies of input layer and set them as input to node_proto.
619 * @param prefix prefix of new layers' names
620 * @param node_proto node which will contain all copies as inputs
621 * @param input name of the node to copy
622 * @param n number of copies
624 void ONNXImporter::expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
625 const std::string& input, size_t n)
627 std::vector<std::string> input_names;
628 input_names.reserve(n);
629 for (size_t j = 0; j < n; j++)
632 copyLP.name = format("%s/copy_%zu", prefix.c_str(), j);
633 copyLP.type = "Identity";
634 CV_Assert((layer_id.find(copyLP.name) == layer_id.end()) &&
635 "Couldn't copy the node: generated name already exists in the graph.");
636 input_names.push_back(copyLP.name);
638 node_proto.set_input(0, input);
639 node_proto.set_output(0, copyLP.name);
640 addLayer(copyLP, node_proto);
642 node_proto.clear_input();
643 for (size_t i = 0; i < input_names.size(); i++)
645 node_proto.add_input(input_names[i]);
649 /** @brief Multiply one of node_proto inputs by -1
650 * @param layerParams parameters of the node
651 * @param node_proto node which input will be replaced
652 * @param input_id id of input to be multiplied by -1
654 void ONNXImporter::addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id)
656 LayerParams powerParams;
657 powerParams.name = layerParams.name + "/neg";
658 powerParams.type = "Power";
659 powerParams.set("scale", -1.f);
662 int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
664 IterLayerId_t layerId = layer_id.find(node_proto.input(input_id));
665 CV_Assert(layerId != layer_id.end());
666 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
668 layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
669 outShapes[powerParams.name] = outShapes[node_proto.input(input_id)];
671 //Replace input to Power
672 node_proto.set_input(input_id, powerParams.name);
675 void ONNXImporter::addConstant(const std::string& name, const Mat& blob)
677 CV_LOG_DEBUG(NULL, "DNN/ONNX: add constant '" << name << "' shape=" << toString(shape(blob)) << ": " << toString(blob));
678 constBlobs.insert(std::make_pair(name, blob));
679 outShapes.insert(std::make_pair(name, shape(blob)));
682 void ONNXImporter::parseOperatorSet()
684 int ir_version = model_proto.has_ir_version() ? static_cast<int>(model_proto.ir_version()) : -1;
688 int opset_size = model_proto.opset_import_size();
691 CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information")
695 for (int i = 0; i < opset_size; ++i)
697 const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i);
698 const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string();
699 int version = opset_entry.has_version() ? opset_entry.version() : -1;
700 if (domain.empty() || domain == str_domain_ai_onnx)
702 // ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md
703 onnx_opset = std::max(onnx_opset, version);
704 onnx_opset_map[str_domain_ai_onnx] = onnx_opset;
708 CV_LOG_DEBUG(NULL, "DNN/ONNX: using non-standard ONNX opset[" << i << "]: domain='" << domain << "' version=" << version);
709 onnx_opset_map[domain] = onnx_opset;
713 CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset);
715 buildDispatchMap_ONNX_AI(onnx_opset);
716 for (const auto& pair : onnx_opset_map)
718 if (pair.first == str_domain_ai_onnx)
720 continue; // done above
722 else if (pair.first == "com.microsoft")
724 buildDispatchMap_COM_MICROSOFT(pair.second);
728 CV_LOG_INFO(NULL, "DNN/ONNX: unknown domain='" << pair.first << "' version=" << pair.second << ". No dispatch map, you may need to register 'custom' layers.");
733 void ONNXImporter::handleQuantizedNode(LayerParams& layerParams,
734 const opencv_onnx::NodeProto& node_proto)
736 // Quantized nodes have output names ending with 'quantized'
737 std::string outName = node_proto.output(0);
738 int len = outName.length();
742 if (outName.substr(len - 9) == "quantized")
744 outName = outName.substr(0, len - 9);
745 Mat scale, zeropoint;
747 if (constBlobs.find(outName + "scale") != constBlobs.end() &&
748 constBlobs.find(outName + "zero_point") != constBlobs.end())
750 scale = getBlob(outName + "scale");
751 zeropoint = getBlob(outName + "zero_point");
755 std::string inpName = node_proto.input(0);
756 inpName = inpName.substr(0, inpName.length() - 9);
757 scale = getBlob(inpName + "scale");
758 zeropoint = getBlob(inpName + "zero_point");
760 for (int i = 0; i < node_proto.output_size(); i++)
762 std::string out = node_proto.output(i);
763 out = out.substr(0, out.length() - 9);
764 addConstant(out + "scale", scale);
765 addConstant(out + "zero_point", zeropoint);
769 if (scale.total() != 1 || zeropoint.total() != 1)
770 CV_Error(Error::StsNotImplemented, "Per-channel scales/zeropoints are not supported");
772 layerParams.set("depth", CV_8S);
773 layerParams.set("scales", DictValue::arrayReal(scale.ptr<float>(), 1));
774 layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr<int8_t>(), 1));
778 void ONNXImporter::populateNet()
780 CV_Assert(model_proto.has_graph());
781 graph_proto = model_proto.graph();
783 std::string framework_version;
784 if (model_proto.has_producer_name())
785 framework_name = model_proto.producer_name();
786 if (model_proto.has_producer_version())
787 framework_version = model_proto.producer_version();
789 CV_LOG_INFO(NULL, "DNN/ONNX: loading ONNX"
790 << (model_proto.has_ir_version() ? cv::format(" v%d", (int)model_proto.ir_version()) : cv::String())
791 << " model produced by '" << framework_name << "'"
792 << (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str()))
793 << ". Number of nodes = " << graph_proto.node_size()
794 << ", initializers = " << graph_proto.initializer_size()
795 << ", inputs = " << graph_proto.input_size()
796 << ", outputs = " << graph_proto.output_size()
801 simplifySubgraphs(graph_proto);
803 const int layersSize = graph_proto.node_size();
804 CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes");
806 constBlobs = getGraphTensors(graph_proto); // scan GraphProto.initializer
807 std::vector<String> netInputs; // map with network inputs (without const blobs)
808 // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
809 for (int i = 0; i < graph_proto.input_size(); ++i)
811 const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i);
812 CV_Assert(valueInfoProto.has_name());
813 const std::string& name = valueInfoProto.name();
814 CV_Assert(valueInfoProto.has_type());
815 const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
816 CV_Assert(typeProto.has_tensor_type());
817 const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
818 CV_Assert(tensor.has_shape());
819 const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
821 int dim_size = tensorShape.dim_size();
822 CV_CheckGE(dim_size, 0, ""); // some inputs are scalars (dims=0), e.g. in Test_ONNX_nets.Resnet34_kinetics test
823 MatShape inpShape(dim_size);
824 for (int j = 0; j < dim_size; ++j)
826 const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
827 if (dimension.has_dim_param())
829 CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
831 // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
832 if (dimension.has_denotation())
834 CV_LOG_INFO(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
836 inpShape[j] = dimension.dim_value();
837 // NHW, NCHW(NHWC), NCDHW(NDHWC); do not set this flag if only N is dynamic
838 if (dimension.has_dim_param() && !(j == 0 && inpShape.size() >= 3))
840 hasDynamicShapes = true;
843 bool isInitialized = ((constBlobs.find(name) != constBlobs.end()));
844 CV_LOG_IF_DEBUG(NULL, !isInitialized, "DNN/ONNX: input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
845 CV_LOG_IF_VERBOSE(NULL, 0, isInitialized, "DNN/ONNX: pre-initialized input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
846 if (dim_size > 0 && !hasDynamicShapes) // FIXIT result is not reliable for models with multiple inputs
848 inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
850 outShapes[valueInfoProto.name()] = inpShape;
851 // fill map: push layer name, layer id and output id
854 netInputs.push_back(name);
855 layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1)));
859 dstNet.setInputsNames(netInputs);
862 for (int i = 0; i < graph_proto.output_size(); ++i)
864 dumpValueInfoProto(i, graph_proto.output(i), "output");
867 if (DNN_DIAGNOSTICS_RUN) {
868 CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!");
869 layerHandler->fillRegistry(graph_proto);
872 for(int li = 0; li < layersSize; li++)
874 const opencv_onnx::NodeProto& node_proto = graph_proto.node(li);
875 handleNode(node_proto);
879 for (int i = 0; i < graph_proto.output_size(); ++i)
881 const std::string& output_name = graph_proto.output(i).name();
882 if (output_name.empty())
884 CV_LOG_ERROR(NULL, "DNN/ONNX: can't register output without name: " << i);
887 ConstIterLayerId_t layerIt = layer_id.find(output_name);
888 if (layerIt == layer_id.end())
890 CV_LOG_ERROR(NULL, "DNN/ONNX: can't find layer for output name: '" << output_name << "'. Does model imported properly?");
894 const LayerInfo& li = layerIt->second;
895 int outputId = dstNet.registerOutput(output_name, li.layerId, li.outputId); CV_UNUSED(outputId);
896 // no need to duplicate message from engine: CV_LOG_DEBUG(NULL, "DNN/ONNX: registered output='" << output_name << "' with id=" << outputId);
899 CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN ? "DNN/ONNX: diagnostic run completed!" : "DNN/ONNX: import completed!"));
902 std::string ONNXImporter::getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto)
904 if (!node_proto.has_domain())
905 return str_domain_ai_onnx;
906 const std::string& domain = node_proto.domain();
908 return str_domain_ai_onnx;
912 const ONNXImporter::DispatchMap& ONNXImporter::getDispatchMap(const opencv_onnx::NodeProto& node_proto)
914 static DispatchMap empty_map;
915 const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
916 auto it = domain_dispatch_map.find(layer_type_domain);
917 if (it == domain_dispatch_map.end())
925 std::string ONNXImporter::extractNodeName(const opencv_onnx::NodeProto& node_proto)
927 // We need to rework DNN outputs API, this is a workaround for #21698
928 if (node_proto.has_name() && !node_proto.name().empty())
931 return node_proto.name();
932 return cv::format("onnx_node!%s", node_proto.name().c_str());
934 for (int i = 0; i < node_proto.output_size(); ++i)
936 const std::string& name = node_proto.output(i);
937 // There are two ways to leave an optional input or output unspecified:
938 // the first, available only for trailing inputs and outputs, is to simply not provide that input;
939 // the second method is to use an empty string in place of an input or output name.
944 return cv::format("onnx_node_output_%d!%s", i, name.c_str());
947 CV_Error(Error::StsAssert, "Couldn't deduce Node name.");
950 void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto)
952 CV_Assert(node_proto.output_size() >= 1);
953 const std::string& name = extractNodeName(node_proto);
954 const std::string& layer_type = node_proto.op_type();
955 const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
956 const auto& dispatch = getDispatchMap(node_proto);
958 CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and "
959 << node_proto.output_size() << " outputs: "
960 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
961 << cv::format(" from %sdomain='", onnx_opset_map.count(layer_type_domain) == 1 ? "" : "undeclared ")
962 << layer_type_domain << "'"
965 if (dispatch.empty())
967 CV_LOG_WARNING(NULL, "DNN/ONNX: missing dispatch map for domain='" << layer_type_domain << "'");
970 LayerParams layerParams;
973 // FIXIT not all cases can be repacked into "LayerParams". Importer should handle such cases directly for each "layer_type"
974 layerParams = getLayerParams(node_proto);
976 layerParams.name = name;
977 layerParams.type = layer_type;
978 layerParams.set("has_dynamic_shapes", hasDynamicShapes);
980 handleQuantizedNode(layerParams, node_proto);
982 DispatchMap::const_iterator iter = dispatch.find(layer_type);
983 if (iter != dispatch.end())
985 CALL_MEMBER_FN(*this, iter->second)(layerParams, node_proto);
989 parseCustomLayer(layerParams, node_proto);
992 catch (const cv::Exception& e)
994 if (DNN_DIAGNOSTICS_RUN)
996 CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
997 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
998 << " from domain='" << layer_type_domain << "'"
1001 cv::AutoLock lock(getLayerFactoryMutex());
1002 auto registeredLayers = getLayerFactoryImpl();
1003 if (registeredLayers.find(layerParams.type) != registeredLayers.end())
1007 Ptr<Layer> layer = LayerFactory::createLayerInstance(layerParams.type, layerParams);
1009 catch (const std::exception& e)
1011 CV_LOG_ERROR(NULL, "DNN/ONNX: Layer of type " << layerParams.type << "(" << layer_type << ") cannot be created with parameters " << layerParams << ". Error: " << e.what()
1018 CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
1019 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
1020 << " from domain='" << layer_type_domain << "'"
1023 for (int i = 0; i < node_proto.input_size(); i++)
1025 CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'");
1027 for (int i = 0; i < node_proto.output_size(); i++)
1029 CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'");
1031 if (DNN_DIAGNOSTICS_RUN)
1033 for (int i = 0; i < node_proto.output_size(); ++i)
1035 layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(0, i)));
1036 outShapes[node_proto.output(i)] = outShapes[node_proto.input(0)];
1040 CV_Error(Error::StsError, cv::format("Node [%s@%s]:(%s) parse error: %s", layer_type.c_str(), layer_type_domain.c_str(), name.c_str(), e.what()));
1044 void ONNXImporter::parseArg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1046 const std::string& layer_type = node_proto.op_type();
1047 layerParams.type = "Arg";
1048 layerParams.set("op", layer_type == "ArgMax" ? "max" : "min");
1049 addLayer(layerParams, node_proto);
1052 void setCeilMode(LayerParams& layerParams)
1054 // auto_pad attribute is deprecated and uses ceil
1055 if (layerParams.has("pad_mode"))
1057 layerParams.set("ceil_mode", true);
1059 else if (!layerParams.has("ceil_mode"))
1061 layerParams.set("ceil_mode", false);
1065 void ONNXImporter::parseMaxUnpool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1067 layerParams.type = "MaxUnpool";
1069 DictValue kernel_shape = layerParams.get("kernel_size");
1070 CV_Assert(kernel_shape.size() == 2);
1071 layerParams.set("pool_k_w", kernel_shape.get<int>(0));
1072 layerParams.set("pool_k_h", kernel_shape.get<int>(1));
1074 int pool_pad_w = 0, pool_pad_h = 0;
1075 if (layerParams.has("pad"))
1077 DictValue pads = layerParams.get("pad");
1078 CV_CheckEQ(pads.size(), 2, "");
1079 pool_pad_w = pads.get<int>(0);
1080 pool_pad_h = pads.get<int>(1);
1082 layerParams.set("pool_pad_w", pool_pad_w);
1083 layerParams.set("pool_pad_h", pool_pad_h);
1086 int pool_stride_w = 1, pool_stride_h = 1;
1087 if (layerParams.has("stride"))
1089 DictValue strides = layerParams.get("stride");
1090 CV_CheckEQ(strides.size(), 2, "");
1091 pool_stride_w = strides.get<int>(0);
1092 pool_stride_h = strides.get<int>(1);
1094 layerParams.set("pool_stride_w", pool_stride_w);
1095 layerParams.set("pool_stride_h", pool_stride_h);
1097 addLayer(layerParams, node_proto);
1100 void ONNXImporter::parseMaxPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1102 int depth = layerParams.get<int>("depth", CV_32F);
1103 layerParams.type = (depth == CV_8S) ? "PoolingInt8" : "Pooling";
1104 layerParams.set("pool", "MAX");
1105 setCeilMode(layerParams);
1106 addLayer(layerParams, node_proto);
1109 void ONNXImporter::parseAveragePool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1111 layerParams.type = "Pooling";
1112 layerParams.set("pool", "AVE");
1113 setCeilMode(layerParams);
1114 layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
1115 addLayer(layerParams, node_proto);
1118 void ONNXImporter::parseGlobalPool(LayerParams &layerParams, const opencv_onnx::NodeProto &node_proto_)
1120 opencv_onnx::NodeProto node_proto = node_proto_;
1121 const std::string& layer_type = node_proto.op_type();
1122 const std::string output_name = node_proto.output(0);
1124 CV_Assert(node_proto.input_size() == 1);
1125 layerParams.type = "Pooling";
1127 if (layer_type == "GlobalMaxPool")
1129 else if (layer_type == "GlobalAveragePool")
1132 CV_Error(Error::StsNotImplemented, "Unsupported Pooling type of " + layer_type + " operation.");
1134 CV_Assert(!layerParams.has("axes"));
1135 layerParams.set("global_pooling", true);
1136 layerParams.set("pool", pool);
1137 addLayer(layerParams, node_proto);
1140 void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1142 opencv_onnx::NodeProto node_proto = node_proto_;
1143 const std::string& layer_type = node_proto.op_type();
1144 const std::string output_name = node_proto.output(0);
1145 int depth = layerParams.get<int>("depth", CV_32F);
1147 CV_Assert(node_proto.input_size() <= 2);
1150 if (layer_type == "ReduceMax")
1152 else if (layer_type == "ReduceMin")
1154 else if (layer_type == "ReduceSum")
1156 else if (layer_type == "ReduceSumSquare")
1157 reduceType = "SUM_SQUARE";
1158 else if (layer_type == "ReduceProd")
1159 reduceType = "PROD";
1160 else if (layer_type == "ReduceL1")
1162 else if (layer_type == "ReduceL2")
1164 else if (layer_type == "ReduceLogSum")
1165 reduceType = "LOG_SUM";
1166 else if (layer_type == "ReduceLogSumExp")
1167 reduceType = "LOG_SUM_EXP";
1168 else if (layer_type == "ReduceMean")
1171 CV_Error(Error::StsNotImplemented, "Unsupported Pooling type of " + layer_type + " operation.");
1173 // The ReduceInt8 can only support "MAX" and "MIN".
1176 CV_Assert(reduceType == "MAX" || reduceType == "MIN");
1179 layerParams.type = (depth == CV_8S) ? "ReduceInt8" : "Reduce";
1180 layerParams.set("reduce", reduceType);
1181 bool keepdims = layerParams.get<int>("keepdims", 1) == 1;
1183 MatShape inpShape = outShapes[node_proto.input(0)];
1184 std::vector<bool> shouldDelete(inpShape.size(), false);
1186 if (layer_type == "ReduceSum" && node_proto.input_size() == 2)
1188 if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
1190 Mat axesMat = getBlob(node_proto, 1);
1191 int axesNum = axesMat.total();
1192 for (int i = 0; i < axesNum; i++)
1194 int axis = normalize_axis(static_cast<int>(axesMat.at<float>(i)), inpShape.size());
1195 shouldDelete[axis] = true;
1199 // in opset 13, the ReduceSum has two input, it takes axes as input instead of attribute
1200 // details:https://github.com/onnx/onnx/issues/3420#issuecomment-844295687
1201 CV_Error(Error::StsNotImplemented, "Non-constant axis values in ReduceSum are not supported.");
1205 if (layerParams.has("axes"))
1207 DictValue axes = layerParams.get("axes");
1208 for (int i = 0; i < axes.size(); i++)
1210 int axis = normalize_axis(axes.get<int>(i), inpShape.size());
1211 shouldDelete[axis] = true;
1216 for (int i = 0; i < inpShape.size(); i++)
1218 shouldDelete[i] = true;
1223 MatShape targetShape;
1224 for (int i = 0; i < inpShape.size(); ++i)
1226 if (!shouldDelete[i])
1228 targetShape.push_back(inpShape[i]);
1232 targetShape.push_back(1);
1236 if (targetShape.empty())
1237 targetShape.push_back(1);
1239 // Using PermuteLayer to move the deleted axis to the last.
1240 std::vector<int> perm(inpShape.size(), 0);
1241 for (int i = 0; i < inpShape.size(); i++)
1244 bool needPermuet = false;
1245 for (int i = 0; i < inpShape.size(); i++)
1247 if (shouldDelete[i])
1249 // find the first not deleted element.
1250 std::vector<bool>::iterator iter = std::find(shouldDelete.begin() + i, shouldDelete.end(), false);
1252 if (iter != shouldDelete.end())
1254 int index = iter - shouldDelete.begin();
1256 bool temp = shouldDelete[index];
1257 shouldDelete[index] = shouldDelete[i];
1258 shouldDelete[i] = temp;
1260 std::swap(perm[index], perm[i]);
1261 std::swap(inpShape[index], inpShape[i]);
1269 auto inputString= node_proto.input(0);
1272 LayerParams permuteLp;
1273 permuteLp.name = layerParams.name + "/permute";
1274 permuteLp.type = (depth == CV_8S) ? "PermuteInt8" : "Permute";
1275 permuteLp.set("order", DictValue::arrayInt(perm.data(), perm.size()));
1277 opencv_onnx::NodeProto protoPermute;
1278 protoPermute.add_input(inputString);
1279 protoPermute.add_output(permuteLp.name);
1280 addLayer(permuteLp, protoPermute);
1281 inputString = permuteLp.name;
1284 std::vector<int> deletedDims;
1285 for (int axis_i = 0; axis_i < inpShape.size(); ++axis_i)
1287 if (shouldDelete[axis_i])
1289 deletedDims.push_back(inpShape[axis_i]);
1293 LayerParams reduceLp = layerParams;
1294 reduceLp.name = layerParams.name + "/reduce";
1295 CV_Assert(layer_id.find(reduceLp.name) == layer_id.end());
1296 reduceLp.set("deleted_dims", DictValue::arrayInt(&deletedDims[0], deletedDims.size()));
1298 node_proto.set_input(0, inputString);
1299 node_proto.set_output(0, reduceLp.name);
1300 addLayer(reduceLp, node_proto);
1302 layerParams.type = (depth == CV_8S) ? "ReshapeInt8" : "Reshape";
1303 layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));
1305 // Set batchsize dim as dynamic to be compatible with batch size >= 2.
1306 if (targetShape.size() > 1)
1308 std::vector<int> dynamicAxes = {0}; // The index of batchsize dim is 0.
1309 std::vector<int> inputIndices = {0};
1311 layerParams.set("has_dynamic_shapes", true);
1312 layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
1313 layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
1316 node_proto.set_input(0, node_proto.output(0));
1317 node_proto.set_output(0, output_name);
1319 addLayer(layerParams, node_proto);
1322 void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1325 std::vector<int> begin;
1326 std::vector<int> end;
1327 std::vector<int> steps;
1328 int inp_size = node_proto.input_size();
1332 if (layerParams.has("axes")) {
1333 DictValue axes = layerParams.get("axes");
1334 for (int i = 1; i < axes.size(); ++i) {
1335 CV_Assert(axes.get<int>(i - 1) == axes.get<int>(i) - 1);
1337 axis = axes.get<int>(0);
1340 DictValue starts = layerParams.get("starts");
1341 DictValue ends = layerParams.get("ends");
1342 CV_Assert(starts.size() == ends.size());
1345 CV_CheckLE(axis, 1024, "Slice layer can't have more than 1024 axes"); // arbitrary limit
1346 begin.resize(axis, 0);
1347 end.resize(axis, INT_MAX);
1349 for (int i = 0; i < starts.size(); ++i)
1351 begin.push_back(starts.get<int>(i));
1352 end.push_back(ends.get<int>(i));
1354 } else { // inp_size > 1
1355 CV_Assert(inp_size >= 3);
1356 for (int i = 1; i < inp_size; i++) {
1357 CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end());
1359 Mat start_blob = getBlob(node_proto, 1);
1360 Mat end_blob = getBlob(node_proto, 2);
1361 CV_Assert(start_blob.total() == end_blob.total());
1364 Mat axes_blob = getBlob(node_proto, 3);
1365 const int* axes = (int*)axes_blob.data;
1366 for (int i = 1; i < axes_blob.total(); ++i) {
1367 CV_Assert(axes[i - 1] == axes[i] - 1);
1372 const int* starts = start_blob.ptr<int>();
1373 const int* ends = end_blob.ptr<int>();
1375 begin.resize(axis, 0);
1376 end.resize(axis, INT_MAX);
1378 std::copy(starts, starts + start_blob.total(), std::back_inserter(begin));
1379 std::copy(ends, ends + end_blob.total(), std::back_inserter(end));
1381 if (inp_size == 5) {
1382 CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end());
1383 Mat step_blob = getBlob(node_proto, 4);
1384 const int* steps_ptr = step_blob.ptr<int>();
1387 steps.resize(axis, 1);
1389 std::copy(steps_ptr, steps_ptr + step_blob.total(), std::back_inserter(steps));
1391 // Very strange application for Slice op with tensor reversing.
1392 // We just workaround it for 2d constants.
1393 if (constBlobs.find(node_proto.input(0)) != constBlobs.end() &&
1395 start_blob.at<int>(0) == -1 && step_blob.at<int>(0) == -1 &&
1396 end_blob.at<int>(0) == std::numeric_limits<int32_t>::min())
1398 Mat inp = getBlob(node_proto, 0);
1402 flip(inp, flipped, 0);
1403 addConstant(node_proto.output(0), flipped);
1409 layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size()));
1410 layerParams.set("end", DictValue::arrayInt(&end[0], end.size()));
1411 layerParams.set("axis", axis);
1414 layerParams.set("steps", DictValue::arrayInt(&steps[0], steps.size()));
1416 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1418 Mat inp = getBlob(node_proto, 0);
1419 std::vector<Mat> inputs, sliced;
1420 inputs.push_back(inp);
1421 runLayer(layerParams, inputs, sliced);
1422 CV_Assert(sliced.size() == 1);
1423 addConstant(node_proto.output(0), sliced[0]);
1426 addLayer(layerParams, node_proto);
1429 void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1431 if (layerParams.has("split"))
1433 DictValue splits = layerParams.get("split");
1434 const int numSplits = splits.size();
1435 CV_Assert(numSplits > 1);
1437 std::vector<int> slicePoints(numSplits - 1, splits.get<int>(0));
1438 for (int i = 1; i < splits.size() - 1; ++i)
1440 slicePoints[i] = slicePoints[i - 1] + splits.get<int>(i);
1442 layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
1446 layerParams.set("num_split", node_proto.output_size());
1448 int depth = layerParams.get<int>("depth", CV_32F);
1449 layerParams.type = (depth == CV_8S) ? "SliceInt8" : "Slice";
1450 layerParams.set("axis", layerParams.get<float>("axis", 0));
1451 addLayer(layerParams, node_proto);
1454 void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1456 opencv_onnx::NodeProto node_proto = node_proto_;
1457 const std::string& layer_type = node_proto.op_type();
1458 bool isSub = layer_type == "Sub";
1460 if (layer_type == "Sum" && node_proto.input_size() == 1)
1462 layerParams.type = "Identity";
1463 addLayer(layerParams, node_proto);
1467 CV_Assert((node_proto.input_size() == 2) || (layer_type == "Sum" && node_proto.input_size() > 2));
1469 if (layer_type == "Sum" && node_proto.input_size() > 2)
1471 for (int i = 0; i < node_proto.input_size(); ++i)
1473 if (layer_id.find(node_proto.input(i)) == layer_id.end())
1475 CV_Error(Error::StsNotImplemented, "Sum of constants is not implemented for inputs > 2");
1480 bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end();
1481 bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end();
1482 if (is_const_0 && is_const_1)
1484 Mat blob_0 = getBlob(node_proto, 0);
1485 Mat blob_1 = getBlob(node_proto, 1);
1486 CV_Assert(blob_0.size == blob_1.size);
1487 Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1);
1488 addConstant(node_proto.output(0), output);
1491 else if (is_const_0 || is_const_1)
1493 int const_blob_id = is_const_0 ? 0 : 1;
1494 int input_id = 1 - const_blob_id;
1495 Mat blob = getBlob(node_proto, const_blob_id);
1496 int blob_total = blob.total();
1498 const float inputScale = isSub && is_const_0 ? -1.f : 1.f;
1499 const float constScale = isSub && is_const_1 ? -1.f : 1.f;
1501 if (blob_total == 1) {
1502 layerParams.type = "Power";
1503 layerParams.set("scale", inputScale);
1504 layerParams.set("shift", constScale * blob.ptr<float>()[0]);
1507 MatShape inpShape = outShapes[node_proto.input(input_id)];
1508 if (shape(blob) == inpShape)
1510 LayerParams constParams;
1511 constParams.name = layerParams.name + "/const";
1512 constParams.type = "Const";
1513 constParams.blobs.push_back(blob);
1514 int id = dstNet.addLayer(constParams.name, constParams.type, constParams);
1515 layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
1516 outShapes[constParams.name] = shape(blob);
1518 layerParams.type = "Eltwise";
1519 float coeffs[] = {1., isSub ? -1.f : 1.f};
1520 layerParams.set("coeff", DictValue::arrayReal<float*>(coeffs, 2));
1521 node_proto.set_input(const_blob_id, constParams.name);
1525 if (inputScale < 0.f)
1527 addNegation(layerParams, node_proto, input_id);
1530 layerParams.type = "Scale";
1531 layerParams.set("bias_term", true);
1533 for (int i = 0; i < graph_proto.initializer_size(); i++)
1535 opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
1536 if (tensor_proto.name() == node_proto.input(const_blob_id))
1538 axis = inpShape.size() - tensor_proto.dims_size();
1542 layerParams.set("axis", axis);
1543 blob = blob.reshape(1, 1);
1544 layerParams.blobs.push_back(constScale * blob);
1548 else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
1550 layerParams.type = "Eltwise";
1553 static float subCoeffs[] = {1.f, -1.f};
1554 layerParams.set("coeff", DictValue::arrayReal<float*>(subCoeffs, 2));
1561 addNegation(layerParams, node_proto, 1);
1563 layerParams.type = "Scale";
1564 layerParams.set("bias_term", true);
1566 addLayer(layerParams, node_proto);
1569 void ONNXImporter::parsePow(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1571 if (layer_id.find(node_proto.input(1)) != layer_id.end())
1572 CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power");
1574 Mat blob = getBlob(node_proto, 1);
1575 if (blob.total() != 1)
1576 CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power");
1578 blob.convertTo(blob, CV_32F);
1579 layerParams.type = "Power";
1580 layerParams.set("power", blob.ptr<float>()[0]);
1581 addLayer(layerParams, node_proto);
1585 void ONNXImporter::parseMinMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1587 const std::string& layer_type = node_proto.op_type();
1588 layerParams.type = "Eltwise";
1589 layerParams.set("operation", layer_type == "Max" ? "max" : "min");
1590 addLayer(layerParams, node_proto);
1593 void ONNXImporter::parseNeg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1595 layerParams.type = "Power";
1596 layerParams.set("scale", -1);
1597 addLayer(layerParams, node_proto);
1600 void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1602 CV_Assert(node_proto.input_size() == 0);
1603 CV_Assert(layerParams.blobs.size() == 1);
1604 addConstant(node_proto.output(0), layerParams.blobs[0]);
1607 void transformBlobs(std::vector<Mat>& blobs)
1612 std::vector<Mat> cudaWorkaround;
1613 cudaWorkaround.push_back(Wx.clone());
1614 cudaWorkaround.push_back(Wh.clone());
1615 cudaWorkaround.push_back(b.clone());
1617 const int numHidden = Wh.size[2];
1620 h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
1622 c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
1624 b = b.reshape(1, b.size[0]);
1625 Mat bx = b.colRange(0, b.cols / 2);
1626 Mat bh = b.colRange(b.cols / 2, b.cols);
1629 auto toIFOC = [] (Mat& in) {
1630 int first = in.size[0];
1631 int rest = in.total() / first / 4;
1632 // every weight blob contains weights for Input, Output, Forget and Cell gates
1633 Mat m = in.reshape(1, {first, 4, rest});
1634 Mat outputGate = m.col(1);
1635 Mat forgetGate = m.col(2);
1636 std::swap_ranges(outputGate.begin<float>(), outputGate.end<float>(), forgetGate.begin<float>());
1643 Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
1644 Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
1648 blobs[2] = b.reshape(1, 1);
1652 if (blobs.size() == 5) {
1653 // so that future patch removing copies can leave all indexing as is
1654 blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end());
1659 blobs[5] = P.colRange(0, numHidden);
1660 blobs[5] = blobs[5].clone().reshape(1, blobs[5].total()); // Single column.
1661 blobs[5] = Mat::diag(blobs[5]);
1663 blobs.push_back(P.colRange(numHidden, 2 * numHidden));
1664 blobs[6] = blobs[6].clone().reshape(1, blobs[6].total()); // Single column.
1665 blobs[6] = Mat::diag(blobs[6]);
1667 blobs.push_back(P.colRange(2 * numHidden, 3 * numHidden));
1668 blobs[7] = blobs[7].clone().reshape(1, blobs[7].total()); // Single column.
1669 blobs[7] = Mat::diag(blobs[7]);
1671 // so that future patch removing copies can leave all indexing as is
1672 blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end());
1675 void ONNXImporter::lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size)
1677 MatShape blobShape(blobShape_, blobShape_ + size);
1679 if (idx < lstm_proto.input_size() && !lstm_proto.input(idx).empty())
1681 blob = getBlob(lstm_proto, idx);
1682 CV_Assert(shape(blob) == blobShape);
1686 blob = Mat(blobShape, CV_32FC1, 0.);
1688 layerParams.blobs.push_back(blob);
1691 void ONNXImporter::lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n)
1693 LayerParams reshapeLp;
1694 reshapeLp.name = cv::format("%s/reshape", input_name.c_str());
1695 reshapeLp.type = "Reshape";
1696 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
1698 reshapeLp.set("dim", DictValue::arrayInt(layerShape, n));
1700 opencv_onnx::NodeProto reshape_proto;
1701 reshape_proto.add_input(input_name);
1702 reshape_proto.add_output(output_name);
1703 addLayer(reshapeLp, reshape_proto);
1706 std::string ONNXImporter::lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n)
1708 LayerParams sliceLP;
1709 sliceLP.name = cv::format("%s/slice_%d", input_name.c_str(), index);
1710 sliceLP.type = "Slice";
1711 CV_Assert(layer_id.find(sliceLP.name) == layer_id.end());
1713 sliceLP.set("begin", DictValue::arrayInt(begin, n));
1714 sliceLP.set("end", DictValue::arrayInt(end, n));
1715 sliceLP.set("axis", 0);
1717 opencv_onnx::NodeProto slice_proto;
1718 slice_proto.add_input(input_name);
1719 slice_proto.add_output(sliceLP.name);
1720 addLayer(sliceLP, slice_proto);
1722 return slice_proto.output(0);
1725 std::string ONNXImporter::lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto,
1726 int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name,
1729 std::string reshape_output = cv::format("%s/reshape_%d", layerParams.name.c_str(), index);
1731 // reshape from Seq, Batch, Dirs*Hidden to Seq, Batch, Dirs, Hidden
1732 // to not confuse reshape with dynamic first dimension, zero means 'leave unchanged'
1733 int layerShape[] = {0, batch_size, num_directions, hidden_size};
1734 lstm_add_reshape(lstm_proto.output(index), reshape_output, layerShape, sizeof(layerShape) / sizeof(layerShape[0]));
1736 // permute from Seq, Batch, Dirs, Hidden to Seq, Dirs, Batch, Hidden
1737 LayerParams permuteLP;
1738 permuteLP.name = reshape_output + "/permute";
1739 permuteLP.type = "Permute";
1740 CV_Assert(layer_id.find(permuteLP.name) == layer_id.end());
1742 int order[] = {0, 2, 1, 3};
1743 permuteLP.set("order", DictValue::arrayInt(order, 4));
1745 opencv_onnx::NodeProto permute_proto;
1746 permute_proto.add_input(reshape_output);
1747 permute_proto.add_output((need_y && index == 0) ? y_name : static_cast<std::string>(permuteLP.name));
1748 addLayer(permuteLP, permute_proto);
1750 return permute_proto.output(0);
1753 void ONNXImporter::lstm_add_transform(int num_directions, int batch_size, int hidden_size,
1754 int index, const std::string& input_name, const std::string& output_name)
1756 if (num_directions == 1)
1758 // Slice: Yh = Y[-1, :, :, :]
1759 int begin[] = {-1}, end[] = {INT_MAX};
1760 std::string slice_output = lstm_add_slice(index, input_name, begin, end, sizeof(begin) / sizeof(begin[0]));
1762 // Reshape: 1x1xBxH -> 1xBxH
1763 int layerShape[] = {1, batch_size, hidden_size};
1764 lstm_add_reshape(slice_output, output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0]));
1768 // Slice: SxDxBxH -> last sequence, first direction
1769 int begin0[] = {-1, 0}, end0[] = {INT_MAX, 1};
1770 std::string slice_0 = lstm_add_slice(0, input_name, begin0, end0, sizeof(begin0) / sizeof(begin0[0]));
1772 // Slice: SxDxBxH -> first sequence, last direction
1773 int begin1[] = {0, -1}, end1[] = {1, INT_MAX};
1774 std::string slice_1 = lstm_add_slice(1, input_name, begin1, end1, sizeof(begin1) / sizeof(begin1[0]));
1776 LayerParams concatLP;
1777 concatLP.name = cv::format("%s/concat", input_name.c_str());
1778 concatLP.type = "Concat";
1779 CV_Assert(layer_id.find(concatLP.name) == layer_id.end());
1781 concatLP.set("axis", 1); // 1x1xBxH -> 1x2xBxH
1783 opencv_onnx::NodeProto concat_proto;
1784 concat_proto.add_input(slice_0);
1785 concat_proto.add_input(slice_1);
1786 concat_proto.add_output(concatLP.name);
1787 addLayer(concatLP, concat_proto);
1789 // Reshape: 1x2xBxH -> 2xBxH
1790 int layerShape[] = {2, batch_size, hidden_size};
1791 lstm_add_reshape(concat_proto.output(0), output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0]));
1795 void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1797 opencv_onnx::NodeProto lstm_proto = node_proto_;
1798 layerParams.name += "/lstm";
1800 // https://github.com/onnx/onnx/blob/main/docs/Operators.md#LSTM
1801 CV_Assert(lstm_proto.input_size() >= 3);
1802 for (size_t i = 1; i < 3; ++i)
1804 const std::string& name = lstm_proto.input(i);
1805 CV_Assert(!name.empty() && constBlobs.count(name) == 1);
1808 IterShape_t shapeIt = outShapes.find(lstm_proto.input(0));
1809 CV_Assert(shapeIt != outShapes.end());
1810 const MatShape x_shape = shapeIt->second;
1812 const int seq_length = x_shape[0];
1813 const int batch_size = x_shape[1];
1814 const int input_size = x_shape[2];
1815 const int hidden_size = layerParams.get<int>("hidden_size");
1816 const int num_directions = constBlobs[lstm_proto.input(1)].size[0];
1818 int w_size[] = {num_directions, 4*hidden_size, input_size};
1819 lstm_extractConsts(layerParams, lstm_proto, 1, w_size, sizeof(w_size) / sizeof(w_size[0])); // W
1821 int r_size[] = {num_directions, 4*hidden_size, hidden_size};
1822 lstm_extractConsts(layerParams, lstm_proto, 2, r_size, sizeof(r_size) / sizeof(r_size[0])); // R
1824 int b_size[] = {num_directions, 8*hidden_size};
1825 lstm_extractConsts(layerParams, lstm_proto, 3, b_size, sizeof(b_size) / sizeof(b_size[0])); // B
1827 if (4 < lstm_proto.input_size() && !lstm_proto.input(4).empty())
1829 Mat blob = getBlob(lstm_proto, 4);
1830 CV_Assert(blob.total() == batch_size);
1831 for (MatIterator_<int32_t> it = blob.begin<int32_t>(); it != blob.end<int32_t>(); ++it)
1833 CV_Assert(*it == seq_length);
1837 int h_size[] = {num_directions, batch_size, hidden_size};
1838 lstm_extractConsts(layerParams, lstm_proto, 5, h_size, sizeof(h_size) / sizeof(h_size[0])); // initial_h
1840 int c_size[] = {num_directions, batch_size, hidden_size};
1841 lstm_extractConsts(layerParams, lstm_proto, 6, c_size, sizeof(c_size) / sizeof(c_size[0])); // initial_c
1843 if (lstm_proto.input_size() > 7 && !lstm_proto.input(7).empty())
1845 layerParams.set("use_peephole", true);
1846 int p_size[] = {num_directions, 3 * hidden_size};
1847 lstm_extractConsts(layerParams, lstm_proto, 7, p_size, sizeof(p_size) / sizeof(p_size[0])); // P
1850 transformBlobs(layerParams.blobs);
1852 layerParams.set("is_onnx", true);
1853 layerParams.set("reverse", layerParams.get<String>("direction", "") == "reverse");
1854 layerParams.set("bidirectional", layerParams.get<String>("direction", "") == "bidirectional");
1856 bool need_yc = lstm_proto.output_size() > 2 && !lstm_proto.output(2).empty();
1857 bool need_yh = lstm_proto.output_size() > 1 && !lstm_proto.output(1).empty();
1858 bool need_y = lstm_proto.output_size() > 0 && !lstm_proto.output(0).empty();
1860 const std::string y_name = need_y ? lstm_proto.output(0) : "";
1861 const std::string yh_name = need_yh ? lstm_proto.output(1) : "";
1862 const std::string yc_name = need_yc ? lstm_proto.output(2) : "";
1864 layerParams.set("produce_cell_output", need_yc);
1866 lstm_proto.clear_output();
1867 if (need_y || need_yh)
1869 // give random names to LSTMLayer's outputs because every output needs postprocessing
1870 lstm_proto.add_output(cv::format("%s_y", layerParams.name.c_str()));
1874 lstm_proto.add_output(yc_name);
1877 addLayer(layerParams, lstm_proto);
1879 std::string y_output = lstm_fix_dims(layerParams, lstm_proto, batch_size, num_directions, hidden_size, need_y,
1883 lstm_add_transform(num_directions, batch_size, hidden_size, 0, y_output, yh_name);
1887 void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1889 opencv_onnx::NodeProto node_proto = node_proto_;
1890 const std::string output_name = node_proto.output(0);
1891 LayerParams gruParams = layerParams;
1892 gruParams.name += "/gru";
1894 // https://pytorch.org/docs/stable/generated/torch.nn.GRU.html?highlight=gru#
1895 CV_Assert(node_proto.input_size() == 6);
1896 Mat Wx = getBlob(node_proto, 1);
1897 Mat Wh = getBlob(node_proto, 2);
1898 Mat b = getBlob(node_proto, 3);
1899 Mat h0 = getBlob(node_proto, 5);
1901 Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
1902 Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
1903 h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
1904 b = b.reshape(1, b.size[0]);
1906 gruParams.blobs.resize(4);
1907 gruParams.blobs[0] = Wh;
1908 gruParams.blobs[1] = Wx;
1909 gruParams.blobs[2] = b;
1910 gruParams.blobs[3] = h0;
1911 gruParams.set("bidirectional", gruParams.get<String>("direction", "") == "bidirectional");
1913 node_proto.set_output(0, gruParams.name); // set different name so output shapes will be registered on that name
1914 addLayer(gruParams, node_proto);
1916 MatShape gruShape = outShapes[node_proto.output(0)];
1918 // Add fake 1 as it is done in ONNX
1919 gruShape.insert(gruShape.begin() + 1, 1);
1921 layerParams.type = "Reshape";
1922 layerParams.set("dim", DictValue::arrayInt(&gruShape[0], gruShape.size()));
1923 node_proto.set_input(0, gruParams.name); // redirect input to GRU
1924 node_proto.set_output(0, output_name); // keep origin GRU's name
1925 addLayer(layerParams, node_proto);
1928 void ONNXImporter::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1930 const float scale = layerParams.has("scale") ? layerParams.get<float>("scale") : 1.0f;
1931 layerParams.erase("scale");
1933 if (layerParams.has("bias"))
1935 layerParams.type = "Scale";
1936 layerParams.blobs.push_back(
1937 Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale));
1939 layerParams.set("bias_term", true);
1940 Mat bias(1, layerParams.get("bias").size(), CV_32FC1);
1941 for (int j = 0; j < bias.total(); j++) {
1942 bias.at<float>(0, j) = layerParams.get("bias").getRealValue(j);
1944 layerParams.blobs.push_back(bias);
1945 layerParams.erase("bias");
1948 layerParams.set("scale", scale);
1949 layerParams.type = "Power";
1951 addLayer(layerParams, node_proto);
1954 void ONNXImporter::parseClip(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1956 layerParams.type = "ReLU6";
1957 float min_value = -FLT_MAX, max_value = FLT_MAX;
1958 int input_size = node_proto.input_size();
1959 CV_Check(input_size, 1 <= input_size && input_size <= 3, "");
1961 if (input_size >= 2 && !node_proto.input(1).empty())
1963 if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
1964 min_value = getBlob(node_proto, 1).at<float>(0);
1966 CV_Error(Error::StsNotImplemented, "Non-constant min values in Clip are not supported");
1969 if (input_size == 3 && !node_proto.input(2).empty())
1971 if (constBlobs.find(node_proto.input(2)) != constBlobs.end())
1972 max_value = getBlob(node_proto, 2).at<float>(0);
1974 CV_Error(Error::StsNotImplemented, "Non-constant max values in Clip are not supported");
1977 layerParams.set("min_value", layerParams.get<float>("min", min_value));
1978 layerParams.set("max_value", layerParams.get<float>("max", max_value));
1979 addLayer(layerParams, node_proto);
1982 void ONNXImporter::parseLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1984 layerParams.type = "ReLU";
1985 layerParams.set("negative_slope", layerParams.get<float>("alpha", 0.01));
1986 addLayer(layerParams, node_proto);
1989 void ONNXImporter::parseRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1991 layerParams.type = "ReLU";
1992 addLayer(layerParams, node_proto);
1995 void ONNXImporter::parseElu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1997 layerParams.type = "ELU";
1998 addLayer(layerParams, node_proto);
2001 void ONNXImporter::parseTanh(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2003 layerParams.type = "TanH";
2004 addLayer(layerParams, node_proto);
2007 void ONNXImporter::parseAbs(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2009 layerParams.type = "AbsVal";
2010 addLayer(layerParams, node_proto);
2013 void ONNXImporter::parseCompare(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2015 CV_Assert(node_proto.input_size() == 2);
2016 const std::string& layer_type = node_proto.op_type();
2018 bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end();
2019 bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end();
2021 if (is_const_0 || is_const_1)
2023 Mat blob = getBlob(node_proto, static_cast<int>(is_const_1));
2024 blob = blob.reshape(1, 1);
2025 layerParams.blobs.push_back(blob);
2028 layerParams.type = "Compare";
2030 if (layer_type == "Equal")
2031 layerParams.set("mode", "equal");
2032 else if (layer_type == "Greater")
2033 layerParams.set("mode", "greater");
2035 layerParams.set("mode", "less");
2036 addLayer(layerParams, node_proto);
2039 void ONNXImporter::parsePRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2041 layerParams.type = "PReLU";
2042 layerParams.blobs.push_back(getBlob(node_proto, 1));
2043 addLayer(layerParams, node_proto);
2046 void ONNXImporter::parseLRN(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2048 replaceLayerParam(layerParams, "size", "local_size");
2049 addLayer(layerParams, node_proto);
2052 void ONNXImporter::parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2054 opencv_onnx::NodeProto node_proto = node_proto_;
2055 if (node_proto.input_size() != 3)
2056 CV_Error(Error::StsNotImplemented,
2057 "Expected input, scale, bias");
2059 layerParams.blobs.resize(4);
2060 layerParams.blobs[2] = getBlob(node_proto, 1); // weightData
2061 layerParams.blobs[3] = getBlob(node_proto, 2); // biasData
2062 layerParams.set("has_bias", true);
2063 layerParams.set("has_weight", true);
2065 // Get number of channels in input
2066 int size = layerParams.blobs[2].total();
2067 layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean
2068 layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std
2070 LayerParams mvnParams;
2071 mvnParams.name = layerParams.name + "/MVN";
2072 mvnParams.type = "MVN";
2073 mvnParams.set("eps", layerParams.get<float>("epsilon"));
2074 layerParams.erase("epsilon");
2077 int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams);
2079 IterLayerId_t layerId = layer_id.find(node_proto.input(0));
2080 CV_Assert(layerId != layer_id.end());
2081 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
2083 layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0)));
2084 outShapes[mvnParams.name] = outShapes[node_proto.input(0)];
2086 //Replace Batch Norm's input to MVN
2087 node_proto.set_input(0, mvnParams.name);
2088 layerParams.type = "BatchNorm";
2089 addLayer(layerParams, node_proto);
2092 void ONNXImporter::parseBatchNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2094 if (node_proto.input_size() != 5)
2095 CV_Error(Error::StsNotImplemented,
2096 "Expected input, scale, bias, mean and var");
2098 layerParams.type = "BatchNorm";
2099 replaceLayerParam(layerParams, "epsilon", "eps");
2100 replaceLayerParam(layerParams, "spatial", "use_global_stats");
2102 Mat meanData = getBlob(node_proto, 3);
2103 Mat stdData = getBlob(node_proto, 4);
2105 layerParams.blobs.push_back(meanData);
2106 layerParams.blobs.push_back(stdData);
2108 if (!node_proto.input(1).empty()) {
2109 layerParams.set("has_weight", true);
2110 layerParams.blobs.push_back(getBlob(node_proto, 1)); // weightData
2112 layerParams.set("has_weight", false);
2115 if (!node_proto.input(2).empty()) {
2116 layerParams.set("has_bias", true);
2117 layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData
2119 layerParams.set("has_bias", false);
2121 addLayer(layerParams, node_proto);
2124 // A * B + C = Y, we require that the dimension of A is [m, k], and the dimension of B is [n, k].
2125 // And the dim of output Y is [m, n]
2126 void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2128 CV_Assert(node_proto.input_size() >= 2);
2129 layerParams.type = "InnerProduct";
2130 Mat weights = getBlob(node_proto, 1);
2132 if (!layerParams.get<int>("transB", 0))
2134 transpose(weights, weights);
2136 layerParams.blobs.push_back(weights);
2138 if (node_proto.input_size() == 3) {
2139 Mat bias = getBlob(node_proto, 2);
2140 layerParams.blobs.push_back(bias);
2142 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2144 Mat inputBuf = getBlob(node_proto, 0);
2146 LayerParams constParams;
2147 constParams.name = node_proto.input(0);
2148 constParams.type = "Const";
2149 constParams.blobs.push_back(inputBuf);
2151 opencv_onnx::NodeProto proto;
2152 proto.add_output(constParams.name);
2153 addLayer(constParams, proto);
2156 layerParams.set("num_output", layerParams.blobs[0].size[0]);
2157 layerParams.set("bias_term", node_proto.input_size() == 3);
2158 addLayer(layerParams, node_proto);
2161 void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2163 CV_Assert(node_proto.input_size() == 2);
2164 layerParams.type = "InnerProduct";
2165 layerParams.set("bias_term", false);
2166 CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end());
2167 int firstInpDims = outShapes[node_proto.input(0)].size();
2170 if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
2172 Mat blob = getBlob(node_proto, 1);
2173 secondInpDims = blob.dims;
2174 layerParams.blobs.push_back(blob.t());
2175 layerParams.set("num_output", layerParams.blobs[0].size[0]);
2177 secondInpDims = outShapes[node_proto.input(1)].size();
2179 layerParams.set("axis", firstInpDims - secondInpDims + 1);
2180 addLayer(layerParams, node_proto);
2183 void findBroadAxis(const MatShape& broadShape, const MatShape& outShape, size_t& axis, int& broadAxis)
2185 const size_t diff = outShape.size() - broadShape.size();
2187 // find the first non-one element of the broadcasting shape
2189 for (; axis < broadShape.size() && broadShape[axis] == 1; ++axis) {}
2191 // find the last non-one element of the broadcasting shape
2192 size_t endAxis = broadShape.size();
2193 for (; endAxis > axis && broadShape[endAxis - 1] == 1; --endAxis) {}
2195 // find one between axis and endAxis - as it needs to be broadcasted,
2196 // dimensions from the left of axis and from the right of endAxis will be handled by Scale layer
2198 for (size_t i = axis; i < endAxis; ++i)
2200 size_t outAxis = i + diff;
2201 if (outShape[outAxis] == broadShape[i])
2206 // ensure we need to broadcast only 1 dimension in the middle
2207 CV_Assert(broadShape[i] == 1 && broadAxis == -1);
2208 broadAxis = static_cast<int>(outAxis);
2215 void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2217 opencv_onnx::NodeProto node_proto = node_proto_;
2218 const std::string& layer_type = node_proto.op_type();
2219 const std::string output_name = node_proto.output(0);
2220 CV_Assert(node_proto.input_size() == 2);
2222 bool isDiv = layer_type == "Div";
2224 bool haveVariables = false;
2225 for (int i = 0; i < 2; ++i)
2227 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
2230 haveVariables = true;
2232 if (constId != -1 && haveVariables)
2234 Mat blob = getBlob(node_proto, constId);
2235 blob = blob.reshape(1, 1);
2236 if (blob.total() == 1) {
2237 float blob_value = blob.ptr<float>()[0];
2238 float coeff = blob_value;
2241 coeff = 1.f / blob_value;
2244 // Power layer calculates (x*scale + shift)^power, so const/x -> (x * (1/const) + 0)^(-1)
2245 layerParams.set("power", -1.f);
2248 layerParams.set("scale", coeff);
2249 layerParams.type = "Power";
2253 divide(1.0, blob, blob);
2254 layerParams.blobs.push_back(blob);
2255 layerParams.type = "Scale";
2258 else if (!haveVariables)
2260 Mat inp0 = getBlob(node_proto, 0);
2261 Mat inp1 = getBlob(node_proto, 1);
2263 if (inp0.size != inp1.size && (inp0.total() != 1 || inp1.total() != 1))
2264 CV_Error_(Error::StsNotImplemented, ("Different shapes case is not supported with constant inputs: %s", layer_type.c_str()));
2266 if (inp0.total() == 1 && inp1.total() == 1 && inp0.dims != inp1.dims)
2268 if (inp0.dims < inp1.dims)
2270 inp0 = inp0.reshape(1, inp1.dims, inp1.size);
2271 inp0.dims = inp1.dims;
2275 inp1 = inp1.reshape(1, inp0.dims, inp0.size);
2276 inp1.dims = inp0.dims;
2281 if (inp0.total() != inp1.total())
2283 if (inp0.total() == 1)
2285 float inp0_value = inp0.ptr<float>()[0];
2286 float coeff = isDiv ? 1.0 / inp0_value : inp0_value;
2287 multiply(inp1, coeff, out);
2291 float inp1_value = inp1.ptr<float>()[0];
2292 float coeff = isDiv ? 1.0 / inp1_value : inp1_value;
2293 multiply(inp0, coeff, out);
2299 out = isDiv ? inp0 / inp1 : inp0.mul(inp1);
2302 if (inp0.dims == 1 && inp1.dims == 1)
2303 out.dims = 1; // to workaround dims == 1
2304 addConstant(output_name, out);
2307 else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
2309 layerParams.type = "Eltwise";
2310 layerParams.set("operation", isDiv ? "div" : "prod");
2314 // Scale layer allocate output with the first input shape
2315 if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)]))
2317 opencv_onnx::NodeProto proto;
2318 proto.add_input(node_proto.input(1));
2319 proto.add_input(node_proto.input(0));
2320 proto.add_output(output_name);
2326 LayerParams powerParams;
2327 powerParams.name = layerParams.name + "/inv";
2328 powerParams.type = "Power";
2329 powerParams.set("power", -1);
2331 //Create Power layer
2332 int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
2334 IterLayerId_t layerId = layer_id.find(node_proto.input(1));
2335 CV_Assert(layerId != layer_id.end());
2336 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
2338 layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
2339 outShapes[powerParams.name] = outShapes[node_proto.input(1)];
2341 //Replace input to Power
2342 node_proto.set_input(1, powerParams.name);
2345 const MatShape& broadShape = outShapes[node_proto.input(1)];
2346 const MatShape& outShape = outShapes[node_proto.input(0)];
2350 findBroadAxis(broadShape, outShape, axis, broadAxis);
2352 // if there is a one dimension in the middle that should be broadcasted, broadcast it
2353 if (broadAxis != -1)
2355 opencv_onnx::NodeProto concat_node_proto = node_proto;
2356 const std::string& input1 = concat_node_proto.input(1);
2358 expandMid(layerParams.name, concat_node_proto, input1, outShape[broadAxis]);
2360 LayerParams concatLP;
2361 concatLP.name = layerParams.name + "/concat";
2362 concatLP.set("axis", broadAxis);
2363 concatLP.type = "Concat";
2364 concat_node_proto.set_output(0, concatLP.name);
2366 addLayer(concatLP, concat_node_proto);
2367 node_proto.set_input(1, concatLP.name);
2370 CV_Assert(axis != outShape.size());
2371 layerParams.set("axis", static_cast<int>(axis));
2372 layerParams.type = "Scale";
2374 addLayer(layerParams, node_proto);
2377 void ONNXImporter::parseConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2379 opencv_onnx::NodeProto node_proto = node_proto_;
2380 CV_Assert(node_proto.input_size() >= 2);
2381 layerParams.type = "Convolution";
2382 for (int j = 1; j < node_proto.input_size(); j++) {
2383 if (constBlobs.find(node_proto.input(j)) != constBlobs.end())
2385 layerParams.blobs.push_back(getBlob(node_proto, j));
2388 int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0];
2389 layerParams.set("num_output", outCn);
2391 // Check for asymmetric padding in Conv2D
2392 if (layerParams.has("pad"))
2394 bool asymmetricPadding = false;
2395 DictValue pads = layerParams.get("pad");
2396 const int dims = pads.size() / 2;
2397 for (int i = 0; i < dims; ++i)
2399 if (pads.get<int>(i) != pads.get<int>(i + dims))
2401 asymmetricPadding = true;
2405 if (asymmetricPadding && pads.size() == 4) // [pad_t, pad_l, pad_b, pad_r]
2407 layerParams.erase("pad");
2408 // No paddings required for N, C axis
2409 std::vector<int> paddings(4, 0);
2410 // Add paddings for H, W axis
2411 for (int i = 0; i < dims; ++i)
2413 paddings.push_back(pads.get<int>(i));
2414 paddings.push_back(pads.get<int>(dims + i));
2417 padLp.name = layerParams.name + "/pad";
2418 padLp.type = "Padding";
2419 padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
2421 opencv_onnx::NodeProto proto;
2422 proto.add_input(node_proto.input(0));
2423 proto.add_output(padLp.name);
2425 addLayer(padLp, proto);
2426 node_proto.set_input(0, padLp.name);
2429 addLayer(layerParams, node_proto);
2432 void ONNXImporter::parseConvTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2434 CV_Assert(node_proto.input_size() >= 2);
2435 layerParams.type = "Deconvolution";
2436 for (int j = 1; j < node_proto.input_size(); j++) {
2437 layerParams.blobs.push_back(getBlob(node_proto, j));
2439 layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get<int>("group", 1));
2440 layerParams.set("bias_term", node_proto.input_size() == 3);
2442 if (!layerParams.has("kernel_size"))
2443 CV_Error(Error::StsNotImplemented,
2444 "Required attribute 'kernel_size' is not present.");
2446 if (layerParams.has("output_shape"))
2448 const DictValue& outShape = layerParams.get("output_shape");
2449 DictValue strides = layerParams.get("stride");
2450 DictValue kernel = layerParams.get("kernel_size");
2453 std::vector<int> adjust_pads;
2454 if (layerParams.has("pad_mode"))
2456 padMode = toUpperCase(layerParams.get<String>("pad_mode"));
2457 if (padMode != "SAME" && padMode != "VALID")
2458 CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
2460 for (int i = 0; i < strides.size(); i++)
2462 int sz = outShape.get<int>(2 + i);
2463 int stride = strides.get<int>(i);
2464 adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride :
2465 (sz - kernel.get<int>(i)) % stride);
2467 layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size()));
2470 else if (layerParams.has("output_padding"))
2472 replaceLayerParam(layerParams, "output_padding", "adj");
2474 addLayer(layerParams, node_proto);
2477 void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2479 int depth = layerParams.get<int>("depth", CV_32F);
2480 layerParams.type = (depth == CV_8S) ? "PermuteInt8" : "Permute";
2481 replaceLayerParam(layerParams, "perm", "order");
2482 if (!layerParams.has("order")) {
2483 MatShape inpShape = outShapes[node_proto.input(0)];
2484 size_t dims = inpShape.size();
2485 std::vector<int> perm(dims);
2486 for (size_t d = 0; d < dims; ++d)
2488 perm[d] = static_cast<int>(dims - 1 - d);
2490 layerParams.set("order", DictValue::arrayInt(perm.data(), perm.size()));
2493 CV_Assert(node_proto.input_size() == 1);
2494 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2496 std::vector<Mat> inputs(1, getBlob(node_proto, 0)), transposed;
2497 runLayer(layerParams, inputs, transposed);
2498 CV_Assert(transposed.size() == 1);
2499 addConstant(node_proto.output(0), transposed[0]);
2502 addLayer(layerParams, node_proto);
2505 void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2507 CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
2508 DictValue axes_dict = layerParams.get("axes");
2509 MatShape inpShape = outShapes[node_proto.input(0)];
2511 std::vector<bool> maskedAxes(inpShape.size(), false);
2512 for (int i = 0; i < axes_dict.size(); ++i)
2514 int axis = axes_dict.getIntValue(i);
2515 CV_CheckLE(axis, static_cast<int>(inpShape.size()), "Squeeze axis");
2516 maskedAxes[axis] = inpShape[axis] == 1;
2519 for (int i = 0; i < inpShape.size(); ++i)
2522 outShape.push_back(inpShape[i]);
2524 if (outShape.size() != inpShape.size())
2526 layerParams.type = "Reshape";
2527 layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
2528 if (hasDynamicShapes)
2530 std::vector<int> dynamicAxes;
2531 std::vector<int> inputIndices;
2532 for (int index = 0; index < inpShape.size(); ++index)
2534 if (!maskedAxes[index])
2535 inputIndices.push_back(index);
2537 for (int index = 0; index < outShape.size(); ++index)
2538 dynamicAxes.push_back(index);
2539 layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
2540 layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
2544 layerParams.type = "Identity";
2546 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2548 Mat inp = getBlob(node_proto, 0);
2549 Mat out = inp.reshape(1, outShape);
2550 out.dims = outShape.size(); // to workaround dims == 1
2551 addConstant(node_proto.output(0), out);
2554 int depth = layerParams.get<int>("depth", CV_32F);
2555 layerParams.type += (depth == CV_8S) ? "Int8" : "";
2556 addLayer(layerParams, node_proto);
2559 void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2561 opencv_onnx::NodeProto node_proto = node_proto_;
2562 CV_CheckEQ(node_proto.input_size(), 1, "");
2563 int axis_ = layerParams.get<int>("axis", 1);
2564 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2566 Mat input = getBlob(node_proto, 0);
2567 int axis = normalize_axis(axis_, input.dims);
2569 int out_size[2] = {1, 1};
2570 for (int i = 0; i < axis; ++i)
2572 out_size[0] *= input.size[i];
2574 for (int i = axis; i < input.dims; ++i)
2576 out_size[1] *= input.size[i];
2579 Mat output = input.reshape(1, 2, out_size);
2580 addConstant(node_proto.output(0), output);
2583 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
2584 CV_Assert(shapeIt != outShapes.end());
2585 MatShape inpShape = shapeIt->second;
2586 int axis = normalize_axis(axis_, inpShape.size());
2588 if (axis == 0 || axis == inpShape.size())
2590 LayerParams reshapeLp;
2591 reshapeLp.name = layerParams.name + "/reshape";
2592 reshapeLp.type = "Reshape";
2593 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
2595 inpShape.insert(axis == 0 ? inpShape.begin() : inpShape.end(), 1);
2596 reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
2598 opencv_onnx::NodeProto proto;
2599 proto.add_input(node_proto.input(0));
2600 proto.add_output(reshapeLp.name);
2601 addLayer(reshapeLp, proto);
2602 node_proto.set_input(0, reshapeLp.name);
2606 LayerParams first_pass;
2607 first_pass.name = layerParams.name + "/flatten";
2608 CV_Assert(layer_id.find(first_pass.name) == layer_id.end());
2609 first_pass.type = "Flatten";
2610 first_pass.set("axis", 0);
2611 first_pass.set("end_axis", axis - 1);
2613 opencv_onnx::NodeProto proto;
2614 proto.add_input(node_proto.input(0));
2615 proto.add_output(first_pass.name);
2616 addLayer(first_pass, proto);
2618 layerParams.set("axis", 1);
2619 node_proto.set_input(0, first_pass.name);
2620 addLayer(layerParams, node_proto);
2623 void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2625 CV_Assert(node_proto.input_size() == 1 || node_proto.input_size() == 2);
2627 if (node_proto.input_size() == 2)
2629 Mat blob = getBlob(node_proto, 1);
2630 axes = DictValue::arrayInt(blob.ptr<int>(), blob.total());
2633 axes = layerParams.get("axes");
2635 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2638 Mat input = getBlob(node_proto, 0);
2640 std::vector<int> dims;
2641 for (int j = 0; j < input.dims; j++) {
2642 dims.push_back(input.size[j]);
2644 CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size());
2645 for (int j = 0; j < axes.size(); j++) {
2646 const int idx = axes.getIntValue(j);
2647 CV_Assert(idx <= dims.size());
2648 dims.insert(dims.begin() + idx, 1);
2651 Mat out = input.reshape(0, dims);
2652 addConstant(node_proto.output(0), out);
2657 if (axes.size() != 1)
2658 CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze");
2660 int depth = layerParams.get<int>("depth", CV_32F);
2662 MatShape inpShape = outShapes[node_proto.input(0)];
2663 int axis = axes.getIntValue(0);
2664 CV_Assert(0 <= axis && axis <= inpShape.size());
2665 std::vector<int> outShape = inpShape;
2666 outShape.insert(outShape.begin() + axis, 1);
2667 layerParams.type = (depth == CV_8S) ? "ReshapeInt8" : "Reshape";
2668 layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
2669 if (hasDynamicShapes)
2671 std::vector<int> dynamicAxes;
2672 std::vector<int> inputIndices;
2673 for (int index = 0; index < outShape.size(); ++index) {
2675 dynamicAxes.push_back(index);
2677 for (int index = 0; index < inpShape.size(); ++index)
2678 inputIndices.push_back(index);
2679 layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
2680 layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
2682 addLayer(layerParams, node_proto);
2685 void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2687 opencv_onnx::NodeProto node_proto = node_proto_;
2688 CV_CheckEQ(node_proto.input_size(), 2, "");
2689 const std::string& input0 = node_proto.input(0);
2690 const std::string& input1 = node_proto.input(1);
2691 const std::string output_name = node_proto.output(0);
2692 Mat newShapeMat = getBlob(input1);
2693 MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
2696 bool haveVariables = constBlobs.find(input0) == constBlobs.end();
2699 IterShape_t shapeIt = outShapes.find(input0);
2700 CV_Assert(shapeIt != outShapes.end());
2701 inpShape = shapeIt->second;
2705 inpShape = shape(getBlob(input0));
2708 String srcName = input0;
2709 // Unsqueeze and repeat along new axis
2710 if (targetShape.size() == inpShape.size() + 1)
2712 inpShape.insert(inpShape.begin(), targetShape.size() - inpShape.size(), 1);
2713 for (int i = 0; i < targetShape.size(); i++)
2715 if (abs(targetShape[i]) == 1)
2716 targetShape[i] = inpShape[i];
2720 LayerParams reshapeLp;
2721 reshapeLp.name = layerParams.name + "/reshape";
2722 reshapeLp.type = "Reshape";
2723 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
2724 reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
2726 opencv_onnx::NodeProto proto;
2727 proto.add_input(node_proto.input(0));
2728 proto.add_output(reshapeLp.name);
2729 addLayer(reshapeLp, proto);
2730 srcName = reshapeLp.name;
2733 CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");
2735 std::vector<int> broadcast_axes;
2736 // shapes aren't right-aligned here because targetShape.size() == inpShape.size()
2737 for (int i = 0; i < targetShape.size(); i++)
2739 if (targetShape[i] != inpShape[i])
2741 if (inpShape[i] == 1)
2743 broadcast_axes.push_back(i);
2745 else if (targetShape[i] != 1)
2747 CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i));
2754 if (broadcast_axes.size() > 1)
2755 CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input");
2757 if (broadcast_axes.empty())
2759 addConstant(output_name, getBlob(node_proto, 0));
2763 Mat input = getBlob(node_proto, 0);
2764 input = input.reshape(0, total(inpShape, 0, broadcast_axes[0]));
2765 Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]);
2766 output = output.reshape(0, targetShape);
2767 addConstant(output_name, output);
2771 if (broadcast_axes.size() == 2 &&
2772 broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1)
2774 LayerParams constParams;
2775 constParams.name = layerParams.name + "/const";
2776 CV_Assert(layer_id.find(constParams.name) == layer_id.end());
2777 constParams.type = "Const";
2779 Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr<int>(), CV_32F);
2780 constParams.blobs.push_back(inp);
2782 opencv_onnx::NodeProto proto;
2783 proto.add_output(constParams.name);
2784 addLayer(constParams, proto);
2786 layerParams.type = "Scale";
2787 layerParams.set("bias_term", false);
2788 node_proto.set_input(0, constParams.name);
2789 node_proto.set_input(1, srcName);
2791 else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1)
2793 expandMid(layerParams.name, node_proto, srcName, targetShape[broadcast_axes[0]]);
2795 layerParams.set("axis", broadcast_axes[0]);
2796 layerParams.type = "Concat";
2797 node_proto.set_output(0, output_name);
2799 else if (broadcast_axes.empty())
2801 layerParams.type = "Identity";
2804 CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
2805 addLayer(layerParams, node_proto);
2808 void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2810 CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape"));
2811 int depth = layerParams.get<int>("depth", CV_32F);
2812 layerParams.type += (depth == CV_8S) ? "Int8" : "";
2814 if (node_proto.input_size() == 2) {
2815 Mat blob = getBlob(node_proto, 1);
2816 CV_Assert(blob.type() == CV_32SC1);
2818 layerParams.set("dim", DictValue::arrayInt<int*>(blob.ptr<int>(), blob.total()));
2820 if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
2821 std::vector<Mat> inputs(1, getBlob(node_proto, 0)), outputs;
2822 runLayer(layerParams, inputs, outputs);
2823 addConstant(node_proto.output(0), outputs[0]);
2828 DictValue shape = layerParams.get("shape");
2829 std::vector<int> dim;
2830 for (int j = 0; j < shape.size(); j++) {
2831 dim.push_back(shape.getIntValue(j));
2834 if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
2835 Mat input = getBlob(node_proto, 0);
2836 Mat out = input.reshape(0, dim);
2837 addConstant(node_proto.output(0), out);
2840 replaceLayerParam(layerParams, "shape", "dim");
2842 addLayer(layerParams, node_proto);
2845 void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2847 int depth = layerParams.get<int>("depth", CV_32F);
2848 layerParams.type = (depth == CV_8S) ? "PaddingInt8" : "Padding";
2849 replaceLayerParam(layerParams, "mode", "type");
2850 if (node_proto.input_size() == 3 || node_proto.input_size() == 2)
2852 // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
2853 // We need to shuffle it to begin0, end0, begin1, end1, ...
2854 Mat paddings = getBlob(node_proto, 1).reshape(1, 2);
2855 paddings = paddings.t();
2856 layerParams.set("paddings", DictValue::arrayInt(paddings.ptr<int>(), paddings.total()));
2858 if (node_proto.input_size() == 3)
2860 Mat value = getBlob(node_proto, 2);
2861 float padValue = (depth == CV_8S) ? (float)value.ptr<int8_t>()[0] : value.ptr<float>()[0];
2862 layerParams.set("value", padValue);
2865 addLayer(layerParams, node_proto);
2868 void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2870 CV_Assert(node_proto.input_size() == 1);
2871 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
2872 CV_Assert(shapeIt != outShapes.end());
2873 const MatShape& inpShape = shapeIt->second;
2875 int dims = static_cast<int>(inpShape.size());
2876 Mat shapeMat(dims, 1, CV_32S);
2877 bool isDynamicShape = false;
2878 for (int j = 0; j < dims; ++j)
2880 int sz = inpShape[j];
2881 isDynamicShape |= (sz == 0);
2882 shapeMat.at<int>(j) = sz;
2884 shapeMat.dims = 1; // FIXIT Mat 1D
2888 CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0)));
2889 CV_Assert(!isDynamicShape); // not supported
2891 addConstant(node_proto.output(0), shapeMat);
2894 void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2896 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2898 Mat blob = getBlob(node_proto, 0);
2900 switch (layerParams.get<int>("to"))
2902 case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break;
2903 case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break;
2904 case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break;
2905 case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break;
2906 case opencv_onnx::TensorProto_DataType_INT8:
2907 case opencv_onnx::TensorProto_DataType_INT16:
2908 case opencv_onnx::TensorProto_DataType_INT32:
2909 case opencv_onnx::TensorProto_DataType_INT64: type = CV_32S; break;
2910 default: type = blob.type();
2913 blob.convertTo(dst, type);
2914 dst.dims = blob.dims;
2915 addConstant(node_proto.output(0), dst);
2919 layerParams.type = "Identity";
2920 addLayer(layerParams, node_proto);
2923 void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2927 if (!layerParams.blobs.empty())
2929 CV_Assert(!layerParams.has("value"));
2930 depth = layerParams.blobs[0].depth();
2932 layerParams.blobs[0].convertTo(floats, CV_32F);
2933 fill_value = floats.at<float>(0, 0);
2936 fill_value = layerParams.get("value", 0);
2938 MatShape inpShape = getBlob(node_proto, 0);
2939 for (int i = 0; i < inpShape.size(); i++)
2940 CV_CheckGT(inpShape[i], 0, "");
2941 Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value));
2942 addConstant(node_proto.output(0), tensor);
2945 void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2947 opencv_onnx::NodeProto node_proto = node_proto_;
2948 CV_Assert(node_proto.input_size() == 2);
2949 Mat indexMat = getBlob(node_proto, 1);
2950 CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1);
2951 int index = indexMat.at<int>(0);
2952 int axis = layerParams.get<int>("axis", 0);
2954 if ((constBlobs.find(node_proto.input(0)) != constBlobs.end()))
2956 Mat input = getBlob(node_proto, 0);
2958 std::vector<cv::Range> ranges(input.dims, Range::all());
2959 ranges[axis] = Range(index, index + 1);
2961 out = input(ranges);
2962 MatShape outShape = shape(out);
2963 if (outShape.size() > 1)
2965 outShape.erase(outShape.begin() + axis);
2966 out.reshape(0, outShape);
2970 addConstant(node_proto.output(0), out);
2975 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
2976 CV_Assert(shapeIt != outShapes.end());
2977 MatShape inpShape = shapeIt->second;
2979 LayerParams sliceLp;
2980 sliceLp.type = "Slice";
2981 sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name;
2982 std::vector<int> begin(inpShape.size(), 0);
2983 std::vector<int> end(inpShape.size(), INT_MAX);
2984 begin[axis] = index;
2985 end[axis] = index + 1;
2987 cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin.data(), begin.size());
2988 cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size());
2989 sliceLp.set("begin", paramBegin);
2990 sliceLp.set("end", paramEnd);
2991 sliceLp.set("has_dynamic_shapes", hasDynamicShapes);
2993 if (inpShape.size() > 1)
2995 opencv_onnx::NodeProto proto;
2996 proto.add_input(node_proto.input(0));
2997 proto.add_output(sliceLp.name);
2998 addLayer(sliceLp, proto);
3000 inpShape.erase(inpShape.begin() + axis);
3001 layerParams.type = "Reshape";
3002 layerParams.set("axis", 0);
3003 layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
3004 if (hasDynamicShapes)
3006 std::vector<int> dynamicAxes;
3007 std::vector<int> inputIndices;
3008 for (int index = 0; index < inpShape.size(); ++index)
3009 dynamicAxes.push_back(index);
3010 for (int index = 0; index < inpShape.size(); ++index)
3011 inputIndices.push_back(index);
3012 layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
3013 layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
3015 node_proto.set_input(0, sliceLp.name);
3019 layerParams = sliceLp;
3022 addLayer(layerParams, node_proto);
3025 void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3027 bool hasVariableInps = false;
3028 for (int i = 0; i < node_proto.input_size(); ++i)
3030 if (layer_id.find(node_proto.input(i)) != layer_id.end())
3032 hasVariableInps = true;
3037 if (!hasVariableInps)
3039 std::vector<Mat> inputs(node_proto.input_size()), concatenated;
3040 // Due constant folding we can get inputs with different number of dimensions
3041 // Insert the missing dimension to inputs
3042 MatShape inputShape;
3043 for (size_t i = 0; i < inputs.size(); ++i)
3045 inputs[i] = getBlob(node_proto, i);
3046 if (inputs[i].size.dims() > inputShape.size())
3048 inputShape = shape(inputs[i]);
3052 // Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1
3053 int axis = layerParams.get<int>("axis", 1);
3054 for (size_t i = 0; i < inputs.size(); ++i)
3056 MatShape targetShape = inputShape;
3057 targetShape[axis] = shape(inputs[i])[axis];
3058 CV_CheckEQ(total(targetShape), total(shape(inputs[i])), "");
3059 inputs[i] = inputs[i].reshape(0, targetShape);
3061 runLayer(layerParams, inputs, concatenated);
3063 CV_Assert(concatenated.size() == 1);
3064 addConstant(node_proto.output(0), concatenated[0]);
3069 for (int i = 0; i < node_proto.input_size(); ++i)
3071 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3073 LayerParams constParams;
3074 constParams.name = node_proto.input(i);
3075 constParams.type = "Const";
3076 constParams.blobs.push_back(getBlob(node_proto, i));
3078 opencv_onnx::NodeProto proto;
3079 proto.add_output(constParams.name);
3080 addLayer(constParams, proto);
3084 addLayer(layerParams, node_proto);
3087 // https://github.com/onnx/onnx/blob/master/docs/Operators.md#Resize
3088 void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3090 for (int i = 1; i < node_proto.input_size(); i++)
3091 CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());
3093 int depth = layerParams.get<int>("depth", CV_32F);
3094 layerParams.type += (depth == CV_8S) ? "Int8" : "";
3096 if (layerParams.has("coordinate_transformation_mode"))
3098 String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
3099 CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
3101 layerParams.set("align_corners", interp_mode == "align_corners");
3102 if (layerParams.get<String>("mode") == "linear")
3104 layerParams.set("mode", interp_mode == "pytorch_half_pixel" || interp_mode == "half_pixel" ?
3105 "opencv_linear" : "bilinear");
3108 if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
3109 layerParams.set("mode", "opencv_linear");
3111 // opset-10: input = [X, scales]
3112 // opset-11: input = [X, roi, scales] or [x, roi, scales, sizes]
3113 // opset-13: may have empty input, [X, "", "", sizes] or [x, "", scales]
3114 int scalesInputId = node_proto.input_size() == 2 ? 1 : 2;
3115 const std::string& scale_name = node_proto.input(scalesInputId);
3117 if(!scale_name.empty())
3118 scales = getBlob(node_proto, scalesInputId);
3120 if (!scales.empty())
3122 CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected");
3123 layerParams.set("zoom_factor_y", scales.at<float>(2));
3124 layerParams.set("zoom_factor_x", scales.at<float>(3));
3126 else if (node_proto.input_size() >= 4) // opset-11 [x, roi, scales, sizes] or opset-13: input = [X, "", "", sizes]
3128 const std::string& inputSizes = node_proto.input(3);
3129 if (constBlobs.find(inputSizes) != constBlobs.end())
3131 Mat shapes = getBlob(inputSizes);
3132 CV_CheckEQ(shapes.total(), (size_t)4, "HCHW layout is expected");
3133 CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
3134 if (shapes.depth() == CV_32F)
3135 shapes.convertTo(shapes, CV_32S);
3136 layerParams.set("width", shapes.at<int>(3));
3137 layerParams.set("height", shapes.at<int>(2));
3141 CV_Error(Error::StsNotImplemented, cv::format("ONNX/Resize: doesn't support dynamic non-constant 'sizes' input: %s", inputSizes.c_str()));
3146 CV_Error(Error::StsNotImplemented, "ONNX/Resize: can't find neither 'scale' nor destination sizes parameters");
3148 replaceLayerParam(layerParams, "mode", "interpolation");
3149 addLayer(layerParams, node_proto);
3152 void ONNXImporter::parseUpsample(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3154 //fused from Resize Subgraph
3155 if (layerParams.has("coordinate_transformation_mode"))
3157 String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
3158 CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
3160 layerParams.set("align_corners", interp_mode == "align_corners");
3161 if (layerParams.get<String>("mode") == "linear")
3163 layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
3164 "opencv_linear" : "bilinear");
3167 if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
3168 layerParams.set("mode", "opencv_linear");
3170 layerParams.type = "Resize";
3171 if (layerParams.has("scales"))
3174 DictValue scales = layerParams.get("scales");
3175 CV_Assert(scales.size() == 4);
3176 layerParams.set("zoom_factor_y", scales.getIntValue(2));
3177 layerParams.set("zoom_factor_x", scales.getIntValue(3));
3179 else if (layerParams.has("height_scale") && layerParams.has("width_scale"))
3182 replaceLayerParam(layerParams, "height_scale", "zoom_factor_y");
3183 replaceLayerParam(layerParams, "width_scale", "zoom_factor_x");
3188 const std::string& input1 = node_proto.input(1);
3189 if (constBlobs.find(input1) != constBlobs.end())
3191 Mat scales = getBlob(input1);
3192 CV_Assert(scales.total() == 4);
3193 layerParams.set("zoom_factor_y", scales.at<float>(2));
3194 layerParams.set("zoom_factor_x", scales.at<float>(3));
3197 replaceLayerParam(layerParams, "mode", "interpolation");
3198 addLayer(layerParams, node_proto);
3201 void ONNXImporter::parseSoftMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3203 const std::string& layer_type = node_proto.op_type();
3204 layerParams.type = "Softmax";
3205 layerParams.set("log_softmax", layer_type == "LogSoftmax");
3206 addLayer(layerParams, node_proto);
3209 void ONNXImporter::parseDetectionOutput(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
3211 opencv_onnx::NodeProto node_proto = node_proto_;
3212 CV_CheckEQ(node_proto.input_size(), 3, "");
3213 if (constBlobs.find(node_proto.input(2)) != constBlobs.end())
3215 Mat priors = getBlob(node_proto, 2);
3217 LayerParams constParams;
3218 constParams.name = layerParams.name + "/priors";
3219 constParams.type = "Const";
3220 constParams.blobs.push_back(priors);
3222 opencv_onnx::NodeProto priorsProto;
3223 priorsProto.add_output(constParams.name);
3224 addLayer(constParams, priorsProto);
3226 node_proto.set_input(2, constParams.name);
3228 addLayer(layerParams, node_proto);
3231 void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3233 layerParams.type = "CumSum";
3236 const std::string& input1 = node_proto.input(1);
3238 if (constBlobs.find(input1) != constBlobs.end())
3240 Mat axis_blob = getBlob(input1);
3241 CV_Assert(axis_blob.total() == 1u);
3242 layerParams.set("axis", axis_blob.at<int>(0));
3245 addLayer(layerParams, node_proto);
3248 void ONNXImporter::parseDepthToSpace(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
3250 // We parse "DepthToSpace" and "SpaceToDepth" in this function.
3251 opencv_onnx::NodeProto node_proto = node_proto_;
3252 const std::string& layer_type = node_proto.op_type();
3253 CV_Assert(layer_type == "DepthToSpace" || layer_type == "SpaceToDepth");
3256 CV_Assert(layerParams.has("blocksize"));
3257 int blocksize = layerParams.get<int>("blocksize");
3258 CV_Assert(blocksize > 0);
3260 // Get mode, only for "DepthToSpace"
3261 std::string modeType = layerParams.get<std::string>("mode", "DCR");
3263 MatShape inpShape = outShapes[node_proto.input(0)];
3264 CV_Assert(inpShape.size() == 4);
3265 int N = inpShape[0], C = inpShape[1], H = inpShape[2], W = inpShape[3];
3267 // Implement DepthToSpace and SpaceToDepth by the Reshape and Permute layer.
3268 std::array<int, 6> shape0, perm;
3269 std::array<int, 4> shape1;
3271 if (layer_type == "DepthToSpace")
3273 if (modeType == "DCR")
3275 shape0 = {N, blocksize, blocksize, C/(blocksize * blocksize), H, W};
3276 perm = {0, 3, 4, 1, 5, 2};
3277 shape1 = {N, C/(blocksize * blocksize), H * blocksize, W * blocksize};
3279 else if (modeType == "CRD")
3281 shape0 = {N, C/(blocksize * blocksize), blocksize, blocksize, H, W};
3282 perm = {0, 1, 4, 2, 5, 3};
3283 shape1 = {N, C/(blocksize * blocksize), H * blocksize, W * blocksize};
3286 CV_Error(Error::StsNotImplemented, "The mode of " + modeType + " in " + layer_type + " Layer is not supported");
3288 else // SpaceToDepth
3290 shape0 = {N, C, H/blocksize, blocksize, W/blocksize, blocksize};
3291 perm = {0, 3, 5, 1, 2, 4};
3292 shape1 = {N, C * blocksize * blocksize, H/blocksize, W/blocksize};
3296 LayerParams reshapeLp;
3297 reshapeLp.name = layerParams.name + "/reshape";
3298 reshapeLp.type = "Reshape";
3299 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
3300 reshapeLp.set("dim", DictValue::arrayInt(shape0.data(), shape0.size()));
3302 opencv_onnx::NodeProto protoReshape;
3303 protoReshape.add_input(node_proto.input(0));
3304 protoReshape.add_output(reshapeLp.name);
3305 addLayer(reshapeLp, protoReshape);
3308 LayerParams permuteLp;
3309 permuteLp.name = layerParams.name + "/permute";
3310 permuteLp.type = "Permute";
3311 CV_Assert(layer_id.find(permuteLp.name) == layer_id.end());
3312 permuteLp.set("order", DictValue::arrayInt(perm.data(), perm.size()));
3314 opencv_onnx::NodeProto protoPermute;
3315 protoPermute.add_input(reshapeLp.name);
3316 protoPermute.add_output(permuteLp.name);
3317 addLayer(permuteLp, protoPermute);
3320 layerParams.type = "Reshape";
3321 layerParams.set("dim", DictValue::arrayInt(shape1.data(), shape1.size()));
3323 node_proto.set_input(0, permuteLp.name);
3324 addLayer(layerParams, node_proto);
3327 void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3329 for (int j = 0; j < node_proto.input_size(); j++) {
3330 if (layer_id.find(node_proto.input(j)) == layer_id.end())
3331 layerParams.blobs.push_back(getBlob(node_proto, j));
3333 addLayer(layerParams, node_proto);
3336 void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3338 const std::string& name = layerParams.name;
3339 std::string& layer_type = layerParams.type;
3340 const std::string& layer_type_domain = node_proto.has_domain() ? node_proto.domain() : std::string();
3341 if (!layer_type_domain.empty() && layer_type_domain != str_domain_ai_onnx)
3343 // append ONNX domain name
3344 static bool DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME = utils::getConfigurationParameterBool("OPENCV_DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME", true);
3345 if (DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME)
3347 layer_type = layer_type_domain + "." + layer_type;
3351 CV_LOG_IF_INFO(NULL, !LayerFactory::isLayerRegistered(layer_type), "DNN/ONNX: unknown node type, try using custom handler for node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
3352 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
3355 parseSimpleLayers(layerParams, node_proto);
3358 void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3360 CV_Assert(node_proto.input_size() == 3);
3361 layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize";
3363 if (node_proto.op_type() == "DequantizeLinear")
3365 Mat scale = getBlob(node_proto, 1);
3366 Mat zeropoint = getBlob(node_proto, 2);
3368 layerParams.set("scales", DictValue::arrayReal(scale.ptr<float>(), 1));
3369 layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr<int8_t>(), 1));
3371 addLayer(layerParams, node_proto);
3374 void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
3376 opencv_onnx::NodeProto node_proto = node_proto_;
3377 int ninputs = node_proto.input_size();
3378 CV_Assert(ninputs == 8 || ninputs == 9);
3380 Mat inp_sc = getBlob(node_proto, 1);
3381 Mat inp_zp = getBlob(node_proto, 2);
3383 if (layerParams.has("pad"))
3385 bool asymmetricPadding = false;
3386 DictValue pads = layerParams.get("pad");
3387 const int dims = pads.size() / 2;
3389 for (int i = 0; i < dims; ++i)
3391 if (pads.get<int>(i) != pads.get<int>(i + dims))
3393 asymmetricPadding = true;
3397 if (asymmetricPadding && pads.size() == 4)
3399 layerParams.erase("pad");
3400 std::vector<int> paddings(4, 0);
3401 for (int i = 0; i < dims; ++i)
3403 paddings.push_back(pads.get<int>(i));
3404 paddings.push_back(pads.get<int>(dims + i));
3407 padLp.name = layerParams.name + "/pad";
3408 padLp.type = "PaddingInt8";
3409 padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
3410 padLp.set("depth", CV_8S);
3411 padLp.set("value", inp_zp.at<int8_t>(0));
3413 opencv_onnx::NodeProto proto;
3414 proto.add_input(node_proto.input(0));
3415 proto.add_output(padLp.name);
3417 addLayer(padLp, proto);
3418 node_proto.set_input(0, padLp.name);
3422 Mat weights = getBlob(node_proto, 3);
3423 int outCn = weights.size[0];
3424 Mat w_scale = getBlob(node_proto, 4);
3425 CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
3426 bool per_channel = w_scale.total() == outCn ? true : false;
3427 Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
3429 Mat out_sc = getBlob(node_proto, 6);
3430 Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S);
3432 Mat weights_2d = weights.reshape(1, outCn);
3433 Mat biasFused(1, outCn, CV_32S);
3434 Mat outputMultiplier(1, outCn, CV_32F);
3435 for (int i = 0; i < outCn; i++)
3437 biasFused.at<int>(i) = bias.at<int>(i) - inp_zp.at<int8_t>(0)*(cv::sum(weights_2d.row(i))[0]);
3438 outputMultiplier.at<float>(i) = (inp_sc.at<float>(0) * wt_sc.at<float>(i)) / out_sc.at<float>(0);
3441 layerParams.type = "ConvolutionInt8";
3442 layerParams.set("num_output", outCn);
3443 layerParams.set("input_zeropoint", inp_zp.at<int8_t>(0));
3444 layerParams.set("input_scale",inp_sc.at<float>(0));
3445 layerParams.set("per_channel", per_channel);
3446 layerParams.blobs.push_back(weights);
3447 layerParams.blobs.push_back(biasFused);
3448 layerParams.blobs.push_back(outputMultiplier);
3449 addLayer(layerParams, node_proto);
3452 void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3454 int ninputs = node_proto.input_size();
3455 CV_Assert(ninputs == 8);
3457 if (constBlobs.find(node_proto.input(3)) == constBlobs.end())
3458 CV_Error(Error::StsNotImplemented, "Variable weights is not supported");
3460 int firstInpDims = outShapes[node_proto.input(0)].size();
3462 Mat inp_sc = getBlob(node_proto, 1);
3463 Mat inp_zp = getBlob(node_proto, 2);
3465 Mat weights = getBlob(node_proto, 3).t();
3466 int outCn = weights.size[0];
3467 int secondInpDims = weights.dims;
3469 Mat w_scale = getBlob(node_proto, 4);
3470 CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
3471 bool per_channel = w_scale.total() == outCn ? true : false;
3472 Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
3473 Mat out_sc = getBlob(node_proto, 6);
3475 Mat bias(1, outCn, CV_32S);
3476 Mat outputMultiplier(1, outCn, CV_32F);
3477 for (int i = 0; i < outCn; i++)
3479 bias.at<int>(i) = -inp_zp.at<int8_t>(0)*(cv::sum(weights.row(i))[0]);
3480 outputMultiplier.at<float>(i) = (inp_sc.at<float>(0) * wt_sc.at<float>(i)) / out_sc.at<float>(0);
3483 layerParams.type = "InnerProductInt8";
3484 layerParams.set("num_output", outCn);
3485 layerParams.set("axis", firstInpDims - secondInpDims + 1);
3486 layerParams.set("input_scale", inp_sc.at<float>(0));
3487 layerParams.set("input_zeropoint", inp_zp.at<int8_t>(0));
3488 layerParams.set("per_channel", per_channel);
3490 layerParams.blobs.push_back(weights);
3491 layerParams.blobs.push_back(bias);
3492 layerParams.blobs.push_back(outputMultiplier);
3493 addLayer(layerParams, node_proto);
3496 void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
3498 opencv_onnx::NodeProto node_proto = node_proto_;
3499 CV_Assert(node_proto.input_size() == 8);
3500 std::string op = (node_proto.op_type() == "QLinearAdd") ? "sum" : "prod";
3502 for (int i = 0; i < 4; i += 3)
3504 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3508 Mat inp_0_sc = getBlob(node_proto, 1);
3509 Mat inp_0_zp = getBlob(node_proto, 2);
3511 Mat inp_1_sc = getBlob(node_proto, 4);
3512 Mat inp_1_zp = getBlob(node_proto, 5);
3514 // Set 2nd input as the const input
3517 cv::swap(inp_0_sc, inp_1_sc);
3518 cv::swap(inp_0_zp, inp_1_zp);
3521 float out_sc = getBlob(node_proto, 6).at<float>(0);
3522 int8_t out_zp = getBlob(node_proto, 7).at<int8_t>(0);
3524 std::vector<float> inp_scales = {inp_0_sc.at<float>(0), inp_1_sc.at<float>(0)};
3525 std::vector<int8_t> inp_zps = {inp_0_zp.at<int8_t>(0), inp_1_zp.at<int8_t>(0)};
3527 std::vector<float> coeffs;
3531 coeffs = {inp_scales[0]/out_sc, inp_scales[1]/out_sc};
3532 offset = out_zp - coeffs[0]*inp_zps[0] - coeffs[1]*inp_zps[1];
3536 coeffs = {inp_scales[0]/out_sc, inp_scales[1]};
3542 Mat blob = getBlob(node_proto, constId);
3543 if (blob.total() == 1)
3545 float val = inp_scales[1] * (blob.at<int8_t>(0) - inp_zps[1]);
3546 float scale = inp_scales[0] / out_sc;
3550 float shift = out_zp - scale*inp_zps[0];
3552 shift += (val/out_sc);
3554 LayerParams rescaleParams;
3555 rescaleParams.name = layerParams.name;
3556 rescaleParams.type = "Requantize";
3557 rescaleParams.set("depth", CV_8S);
3558 rescaleParams.set("scale", scale);
3559 rescaleParams.set("shift", shift);
3560 rescaleParams.set("isEltwise", true);
3561 addLayer(rescaleParams, node_proto);
3566 MatShape inpShape = outShapes[node_proto.input(3 - constId)];
3570 if (shape(blob) == inpShape)
3572 LayerParams constParams;
3573 constParams.name = layerParams.name + "/const";
3574 constParams.type = "ConstInt8";
3575 constParams.set("depth", CV_8S);
3576 constParams.set("scales", DictValue::arrayReal(inp_1_sc.ptr<float>(), 1));
3577 constParams.set("zeropoints", DictValue::arrayInt(inp_1_zp.ptr<int8_t>(), 1));
3578 constParams.blobs.push_back(blob);
3580 int id = dstNet.addLayer(constParams.name, constParams.type, CV_8S, constParams);
3581 layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
3582 outShapes[constParams.name] = shape(blob);
3583 node_proto.set_input(constId, constParams.name);
3585 layerParams.type = "EltwiseInt8";
3586 layerParams.set("operation", op);
3587 layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
3588 layerParams.set("offset", offset);
3592 layerParams.type = "ScaleInt8";
3593 layerParams.set("bias_term", op == "sum");
3595 for (int i = 0; i < graph_proto.initializer_size(); i++)
3597 opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
3598 if (tensor_proto.name() == node_proto.input(constId))
3600 axis = inpShape.size() - tensor_proto.dims_size();
3604 layerParams.set("axis", axis);
3605 blob = blob.reshape(1, 1);
3606 Mat blob_dequantized;
3607 blob.convertTo(blob_dequantized, CV_32F, inp_scales[1], -(inp_scales[1] * inp_zps[1]));
3608 layerParams.blobs.push_back(blob_dequantized);
3612 else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(3)])
3614 layerParams.type = "EltwiseInt8";
3615 layerParams.set("operation", op);
3616 layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
3617 layerParams.set("offset", offset);
3621 layerParams.type = "ScaleInt8";
3622 layerParams.set("bias_term", op == "sum");
3625 layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size()));
3626 layerParams.set("input_zeropoints", DictValue::arrayInt(inp_zps.data(), inp_zps.size()));
3627 addLayer(layerParams, node_proto);
3630 void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3632 CV_Assert(node_proto.input_size() == 5);
3634 float slope = layerParams.get<float>("alpha");
3635 float inp_sc = getBlob(node_proto, 1).at<float>(0);
3636 int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
3637 float out_sc = getBlob(node_proto, 3).at<float>(0);
3638 int8_t out_zp = getBlob(node_proto, 4).at<int8_t>(0);
3640 Mat lookUpTable(1, 256, CV_8S);
3641 int8_t* table = lookUpTable.ptr<int8_t>();
3642 for (int i = -128; i < 128; i++)
3644 float x = inp_sc*(i - inp_zp);
3645 float y = x >= 0.f ? x : slope*x;
3646 int quantized = out_zp + cvRound(y/out_sc);
3647 table[i+128] = saturate_cast<int8_t>(quantized);
3650 layerParams.type = "ReLUInt8";
3651 layerParams.set("input_scale", inp_sc);
3652 layerParams.set("input_zeropoint", inp_zp);
3653 layerParams.set("slope", slope);
3654 layerParams.blobs.push_back(lookUpTable);
3655 addLayer(layerParams, node_proto);
3658 void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3660 CV_Assert(node_proto.input_size() == 5);
3662 float inp_sc = getBlob(node_proto, 1).at<float>(0);
3663 int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
3664 float out_sc = getBlob(node_proto, 3).at<float>(0);
3665 int8_t out_zp = getBlob(node_proto, 4).at<int8_t>(0);
3667 Mat lookUpTable(1, 256, CV_8S);
3668 int8_t* table = lookUpTable.ptr<int8_t>();
3669 for (int i = -128; i < 128; i++)
3671 float x = inp_sc*(i - inp_zp);
3672 float y = 1.f/(1.f + std::exp(-x));
3673 int quantized = out_zp + cvRound(y/out_sc);
3674 table[i+128] = saturate_cast<int8_t>(quantized);
3677 layerParams.type = "SigmoidInt8";
3678 layerParams.set("input_scale", inp_sc);
3679 layerParams.set("input_zeropoint", inp_zp);
3680 layerParams.blobs.push_back(lookUpTable);
3681 addLayer(layerParams, node_proto);
3684 void ONNXImporter::parseQAvgPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3686 CV_Assert(node_proto.input_size() == 5);
3687 float inp_sc = getBlob(node_proto, 1).at<float>(0);
3688 int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
3689 float out_sc = getBlob(node_proto, 3).at<float>(0);
3691 layerParams.type = "PoolingInt8";
3692 layerParams.set("pool", "ave");
3693 layerParams.set("global_pooling", node_proto.op_type() == "QLinearGlobalAveragePool");
3694 layerParams.set("multiplier", inp_sc/out_sc);
3695 layerParams.set("input_zeropoint", inp_zp);
3696 addLayer(layerParams, node_proto);
3699 void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
3701 opencv_onnx::NodeProto node_proto = node_proto_;
3702 layerParams.type = "ConcatInt8";
3703 int num_inputs = node_proto.input_size();
3705 float out_scale = getBlob(node_proto, 0).at<float>(0);
3706 int out_zp = getBlob(node_proto, 1).at<int8_t>(0);
3708 for (int i = 2; i < num_inputs; i += 3)
3710 float inp_scale = getBlob(node_proto, i + 1).at<float>(0);
3711 int inp_zp = getBlob(node_proto, i + 2).at<int8_t>(0);
3713 if (inp_scale != out_scale || inp_zp != out_zp)
3715 float scale = inp_scale/out_scale;
3716 float shift = out_zp - scale*inp_zp;
3718 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3720 Mat blob = getBlob(node_proto, i);
3722 blob.convertTo(blob_rescaled, CV_8S, scale, shift);
3723 constBlobs[node_proto.input(i)] = blob_rescaled;
3727 LayerParams rescaleParams;
3728 rescaleParams.name = node_proto.input(i) + "/rescale";
3729 rescaleParams.type = "Requantize";
3730 rescaleParams.set("depth", CV_8S);
3731 rescaleParams.set("scale", scale);
3732 rescaleParams.set("shift", shift);
3733 rescaleParams.set("isEltwise", false);
3735 opencv_onnx::NodeProto proto;
3736 proto.add_input(node_proto.input(i));
3737 proto.add_output(rescaleParams.name);
3738 addLayer(rescaleParams, proto);
3739 node_proto.set_input(i, rescaleParams.name);
3744 bool hasVariableInps = false;
3745 for (int i = 2; i < num_inputs; i += 3)
3747 if (layer_id.find(node_proto.input(i)) != layer_id.end())
3749 hasVariableInps = true;
3754 if (!hasVariableInps)
3756 std::vector<Mat> inputs, concatenated;
3757 MatShape inputShape;
3758 for (size_t i = 2; i < num_inputs; i += 3)
3760 Mat blob = getBlob(node_proto, i);
3761 if (blob.size.dims() > inputShape.size())
3763 inputShape = shape(blob);
3765 inputs.push_back(blob);
3768 int axis = layerParams.get<int>("axis", 1);
3769 for (size_t i = 0; i < inputs.size(); ++i)
3771 MatShape targetShape = inputShape;
3772 targetShape[axis] = shape(inputs[i])[axis];
3773 CV_CheckEQ(total(targetShape), total(shape(inputs[i])), "");
3774 inputs[i] = inputs[i].reshape(0, targetShape);
3776 runLayer(layerParams, inputs, concatenated);
3777 CV_Assert(concatenated.size() == 1);
3778 addConstant(layerParams.name, concatenated[0]);
3783 for (int i = 2; i < num_inputs; i += 3)
3785 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3787 LayerParams constParams;
3788 constParams.name = node_proto.input(i);
3789 constParams.type = "ConstInt8";
3790 constParams.blobs.push_back(getBlob(node_proto, i));
3791 constParams.set("depth", CV_8S);
3793 opencv_onnx::NodeProto proto;
3794 proto.add_output(constParams.name);
3795 addLayer(constParams, proto);
3799 addLayer(layerParams, node_proto);
3802 // Domain: ai.onnx (default)
3803 // URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md
3804 void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
3806 CV_UNUSED(opset_version);
3807 DispatchMap dispatch;
3809 dispatch["ArgMax"] = dispatch["ArgMin"] = &ONNXImporter::parseArg;
3810 dispatch["MaxUnpool"] = &ONNXImporter::parseMaxUnpool;
3811 dispatch["MaxPool"] = &ONNXImporter::parseMaxPool;
3812 dispatch["AveragePool"] = &ONNXImporter::parseAveragePool;
3813 dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = &ONNXImporter::parseGlobalPool;
3814 dispatch["ReduceMax"] = dispatch["ReduceMin"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] = dispatch["ReduceMax"] =
3815 dispatch["ReduceMin"] = dispatch["ReduceSumSquare"] = dispatch["ReduceProd"] = dispatch["ReduceL1"] =
3816 dispatch["ReduceL2"] = dispatch["ReduceLogSum"] = dispatch["ReduceLogSumExp"] = &ONNXImporter::parseReduce;
3817 dispatch["Slice"] = &ONNXImporter::parseSlice;
3818 dispatch["Split"] = &ONNXImporter::parseSplit;
3819 dispatch["Add"] = dispatch["Sum"] = dispatch["Sub"] = &ONNXImporter::parseBias;
3820 dispatch["Pow"] = &ONNXImporter::parsePow;
3821 dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseMinMax;
3822 dispatch["Neg"] = &ONNXImporter::parseNeg;
3823 dispatch["Constant"] = &ONNXImporter::parseConstant;
3824 dispatch["LSTM"] = &ONNXImporter::parseLSTM;
3825 dispatch["GRU"] = &ONNXImporter::parseGRU;
3826 dispatch["ImageScaler"] = &ONNXImporter::parseImageScaler;
3827 dispatch["Clip"] = &ONNXImporter::parseClip;
3828 dispatch["LeakyRelu"] = &ONNXImporter::parseLeakyRelu;
3829 dispatch["Relu"] = &ONNXImporter::parseRelu;
3830 dispatch["Elu"] = &ONNXImporter::parseElu;
3831 dispatch["Tanh"] = &ONNXImporter::parseTanh;
3832 dispatch["Abs"] = &ONNXImporter::parseAbs;
3833 dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = &ONNXImporter::parseCompare;
3834 dispatch["PRelu"] = &ONNXImporter::parsePRelu;
3835 dispatch["LRN"] = &ONNXImporter::parseLRN;
3836 dispatch["InstanceNormalization"] = &ONNXImporter::parseInstanceNormalization;
3837 dispatch["BatchNormalization"] = &ONNXImporter::parseBatchNormalization;
3838 dispatch["Gemm"] = &ONNXImporter::parseGemm;
3839 dispatch["MatMul"] = &ONNXImporter::parseMatMul;
3840 dispatch["Mul"] = dispatch["Div"] = &ONNXImporter::parseMul;
3841 dispatch["Conv"] = &ONNXImporter::parseConv;
3842 dispatch["ConvTranspose"] = &ONNXImporter::parseConvTranspose;
3843 dispatch["Transpose"] = &ONNXImporter::parseTranspose;
3844 dispatch["Squeeze"] = &ONNXImporter::parseSqueeze;
3845 dispatch["Flatten"] = &ONNXImporter::parseFlatten;
3846 dispatch["Unsqueeze"] = &ONNXImporter::parseUnsqueeze;
3847 dispatch["Expand"] = &ONNXImporter::parseExpand;
3848 dispatch["Reshape"] = &ONNXImporter::parseReshape;
3849 dispatch["Pad"] = &ONNXImporter::parsePad;
3850 dispatch["Shape"] = &ONNXImporter::parseShape;
3851 dispatch["Cast"] = &ONNXImporter::parseCast;
3852 dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter::parseConstantFill;
3853 dispatch["Gather"] = &ONNXImporter::parseGather;
3854 dispatch["Concat"] = &ONNXImporter::parseConcat;
3855 dispatch["Resize"] = &ONNXImporter::parseResize;
3856 dispatch["Upsample"] = &ONNXImporter::parseUpsample;
3857 dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax;
3858 dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput;
3859 dispatch["CumSum"] = &ONNXImporter::parseCumSum;
3860 dispatch["SpaceToDepth"] = dispatch["DepthToSpace"] = &ONNXImporter::parseDepthToSpace;
3862 std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
3863 "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
3864 "Identity", "Log", "Round", "Reciprocal", "Selu", "Sign", "Sigmoid", "Sin", "Sinh", "Softmax",
3865 "Softplus", "Softsign", "Shrink", "Sqrt", "Tan", "ThresholdedRelu"};
3866 for (const auto& name : simpleLayers)
3868 dispatch[name] = &ONNXImporter::parseSimpleLayers;
3871 // ai.onnx: opset 10+
3872 dispatch["QuantizeLinear"] = dispatch["DequantizeLinear"] = &ONNXImporter::parseQuantDequant;
3873 dispatch["QLinearConv"] = &ONNXImporter::parseQConv;
3874 dispatch["QLinearMatMul"] = &ONNXImporter::parseQMatMul;
3876 domain_dispatch_map[str_domain_ai_onnx] = dispatch;
3879 // Domain: com.microsoft
3880 // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
3881 void ONNXImporter::buildDispatchMap_COM_MICROSOFT(int opset_version)
3883 CV_UNUSED(opset_version);
3884 DispatchMap dispatch;
3886 dispatch["QLinearAdd"] = dispatch["QLinearMul"] = &ONNXImporter::parseQEltwise;
3887 dispatch["QLinearAveragePool"] = dispatch["QLinearGlobalAveragePool"] = &ONNXImporter::parseQAvgPool;
3888 dispatch["QLinearLeakyRelu"] = &ONNXImporter::parseQLeakyRelu;
3889 dispatch["QLinearSigmoid"] = &ONNXImporter::parseQSigmoid;
3890 dispatch["QLinearConcat"] = &ONNXImporter::parseQConcat;
3892 domain_dispatch_map["com.microsoft"] = dispatch;
3896 Net readNetFromONNX(const String& onnxFile)
3898 return detail::readNetDiagnostic<ONNXImporter>(onnxFile.c_str());
3901 Net readNetFromONNX(const char* buffer, size_t sizeBuffer)
3903 return detail::readNetDiagnostic<ONNXImporter>(buffer, sizeBuffer);
3906 Net readNetFromONNX(const std::vector<uchar>& buffer)
3908 return readNetFromONNX(reinterpret_cast<const char*>(buffer.data()), buffer.size());
3911 Mat readTensorFromONNX(const String& path)
3913 std::fstream input(path.c_str(), std::ios::in | std::ios::binary);
3916 CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", path.c_str()));
3919 opencv_onnx::TensorProto tensor_proto = opencv_onnx::TensorProto();
3920 if (!tensor_proto.ParseFromIstream(&input))
3922 CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX data: %s", path.c_str()));
3924 Mat mat = getMatFromTensor(tensor_proto);
3925 releaseONNXTensor(tensor_proto);
3929 CV__DNN_INLINE_NS_END