modules/dnn/src/onnx/onnx_importer.cpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 // Copyright (C) 2018, Intel Corporation, all rights reserved.
   6 // Third party copyrights are property of their respective owners.
   7
   8 #include "../precomp.hpp"
   9 #include <opencv2/dnn/shape_utils.hpp>
  10
  11 #include <opencv2/dnn/layer_reg.private.hpp>
  12
  13 #include <opencv2/core/utils/logger.defines.hpp>
  14 #undef CV_LOG_STRIP_LEVEL
  15 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
  16 #include <opencv2/core/utils/logger.hpp>
  17
  18 #include <opencv2/core/utils/configuration.private.hpp>
  19
  20
  21 #ifdef HAVE_PROTOBUF
  22
  23 #include <iostream>
  24 #include <fstream>
  25 #include <string>
  26 #include <limits>
  27 #include <algorithm>
  28
  29 #if defined _MSC_VER && _MSC_VER < 1910/*MSVS 2017*/
  30 #pragma warning(push)
  31 #pragma warning(disable: 4503)  // decorated name length exceeded, name was truncated
  32 #endif
  33
  34 #if defined(__GNUC__) && __GNUC__ >= 5
  35 #pragma GCC diagnostic push
  36 #pragma GCC diagnostic ignored "-Wsuggest-override"
  37 #endif
  38 #include "opencv-onnx.pb.h"
  39 #if defined(__GNUC__) && __GNUC__ >= 5
  40 #pragma GCC diagnostic pop
  41 #endif
  42
  43 #include "onnx_graph_simplifier.hpp"
  44
  45 namespace cv {
  46 namespace dnn {
  47 CV__DNN_INLINE_NS_BEGIN
  48
  49 extern bool DNN_DIAGNOSTICS_RUN;
  50
  51 class ONNXLayerHandler;
  52
  53 class ONNXImporter
  54 {
  55     opencv_onnx::ModelProto model_proto;
  56     struct LayerInfo {
  57         int layerId;
  58         int outputId;
  59         LayerInfo(int _layerId = 0, int _outputId = 0) : layerId(_layerId), outputId(_outputId) {}
  60     };
  61
  62     std::map<std::string, Mat> getGraphTensors(
  63                                     const opencv_onnx::GraphProto& graph_proto);
  64     Mat getBlob(const opencv_onnx::NodeProto& node_proto, int index);
  65     Mat getBlob(const std::string& input_name);
  66
  67     LayerParams getLayerParams(const opencv_onnx::NodeProto& node_proto);
  68
  69     void addConstant(const std::string& name, const Mat& blob);
  70     void addLayer(LayerParams& layerParams,
  71                   const opencv_onnx::NodeProto& node_proto);
  72     void handleQuantizedNode(LayerParams& layerParams,
  73                              const opencv_onnx::NodeProto& node_proto);
  74
  75     void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
  76                    const std::string& input, size_t n);
  77     void addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id);
  78 public:
  79     ONNXImporter(Net& net, const char *onnxFile);
  80     ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer);
  81
  82     void populateNet();
  83
  84 protected:
  85     std::unique_ptr<ONNXLayerHandler> layerHandler;
  86     Net& dstNet;
  87
  88     opencv_onnx::GraphProto graph_proto;
  89     std::string framework_name;
  90
  91     std::map<std::string, Mat> constBlobs;
  92
  93     std::map<std::string, MatShape> outShapes;  // List of internal blobs shapes.
  94     bool hasDynamicShapes;  // Whether the model has inputs with dynamic shapes
  95     typedef std::map<std::string, MatShape>::iterator IterShape_t;
  96
  97     std::map<std::string, LayerInfo> layer_id;
  98     typedef std::map<std::string, LayerInfo>::iterator IterLayerId_t;
  99
 100     void handleNode(const opencv_onnx::NodeProto& node_proto);
 101
 102 private:
 103     friend class ONNXLayerHandler;
 104     typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 105     typedef std::map<std::string, ONNXImporterNodeParser> DispatchMap;
 106     typedef std::map<std::string, DispatchMap> DomainDispatchMap;
 107
 108     DomainDispatchMap domain_dispatch_map;
 109     std::string getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto);
 110     const DispatchMap& getDispatchMap(const opencv_onnx::NodeProto& node_proto);
 111     void buildDispatchMap_ONNX_AI(int opset_version);
 112     void buildDispatchMap_COM_MICROSOFT(int opset_version);
 113
 114     // Domain: 'ai.onnx' (default)
 115     // URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md
 116     void parseArg                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 117     void parseMaxUnpool            (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 118     void parseMaxPool              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 119     void parseAveragePool          (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 120     void parseReduce               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 121     void parseSlice                (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 122     void parseSplit                (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 123     void parseBias                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 124     void parsePow                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 125     void parseMinMax               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 126     void parseNeg                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 127     void parseConstant             (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 128     void parseLSTM                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 129     void parseGRU                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 130     void parseImageScaler          (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 131     void parseClip                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 132     void parseLeakyRelu            (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 133     void parseRelu                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 134     void parseElu                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 135     void parseTanh                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 136     void parseAbs                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 137     void parseCompare              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 138     void parsePRelu                (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 139     void parseLRN                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 140     void parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 141     void parseBatchNormalization   (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 142     void parseGemm                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 143     void parseMatMul               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 144     void parseMul                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 145     void parseConv                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 146     void parseConvTranspose        (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 147     void parseTranspose            (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 148     void parseSqueeze              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 149     void parseFlatten              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 150     void parseUnsqueeze            (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 151     void parseExpand               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 152     void parseReshape              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 153     void parsePad                  (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 154     void parseShape                (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 155     void parseCast                 (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 156     void parseConstantFill         (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 157     void parseGather               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 158     void parseConcat               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 159     void parseResize               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 160     void parseUpsample             (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 161     void parseSoftMax              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 162     void parseDetectionOutput      (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 163     void parseCumSum               (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 164     void parseSimpleLayers         (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 165
 166     // Domain: com.microsoft
 167     // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
 168     void parseQuantDequant         (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 169     void parseQConv                (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 170     void parseQMatMul              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 171     void parseQEltwise             (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 172     void parseQLeakyRelu           (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 173     void parseQSigmoid             (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 174     void parseQAvgPool             (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 175     void parseQConcat              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 176
 177     // '???' domain or '???' layer type
 178     void parseCustomLayer          (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
 179
 180     int onnx_opset;  // OperatorSetIdProto for 'onnx' domain
 181     std::map<std::string, int> onnx_opset_map;  // map from OperatorSetIdProto
 182     void parseOperatorSet();
 183
 184     const std::string str_domain_ai_onnx = "ai.onnx";
 185 };
 186
 187 class ONNXLayerHandler : public detail::LayerHandler
 188 {
 189 public:
 190     explicit ONNXLayerHandler(ONNXImporter* importer_);
 191
 192     void fillRegistry(const opencv_onnx::GraphProto& net);
 193
 194 protected:
 195     ONNXImporter* importer;
 196 };
 197
 198 ONNXLayerHandler::ONNXLayerHandler(ONNXImporter* importer_) : importer(importer_){}
 199
 200 void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net)
 201 {
 202     int layersSize = net.node_size();
 203     for (int li = 0; li < layersSize; li++) {
 204         const opencv_onnx::NodeProto &node_proto = net.node(li);
 205         const std::string& name = node_proto.output(0);
 206         const std::string& type = node_proto.op_type();
 207         const std::string& layer_type_domain = importer->getLayerTypeDomain(node_proto);
 208         const auto& dispatch = importer->getDispatchMap(node_proto);
 209         if (dispatch.find(type) == dispatch.end())
 210         {
 211             addMissing(name, cv::format("%s.%s", layer_type_domain.c_str(), type.c_str()));
 212         }
 213     }
 214     printMissing();
 215 }
 216
 217 ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
 218     : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
 219     , dstNet(net)
 220     , onnx_opset(0)
 221 {
 222     hasDynamicShapes = false;
 223     CV_Assert(onnxFile);
 224     CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);
 225
 226     std::fstream input(onnxFile, std::ios::in | std::ios::binary);
 227     if (!input)
 228     {
 229         CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile));
 230     }
 231
 232     if (!model_proto.ParseFromIstream(&input))
 233     {
 234         CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile));
 235     }
 236
 237     populateNet();
 238 }
 239
 240 ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
 241     : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
 242     , dstNet(net)
 243     , onnx_opset(0)
 244 {
 245     hasDynamicShapes = false;
 246     CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
 247
 248     struct _Buf : public std::streambuf
 249             {
 250         _Buf(const char* buffer, size_t sizeBuffer)
 251         {
 252             char* p = const_cast<char*>(buffer);
 253             setg(p, p, p + sizeBuffer);
 254         }
 255             };
 256
 257     _Buf buf(buffer, sizeBuffer);
 258     std::istream input(&buf);
 259
 260     if (!model_proto.ParseFromIstream(&input))
 261         CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array.");
 262
 263     populateNet();
 264 }
 265
 266 inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey)
 267 {
 268     if (layerParams.has(oldKey)) {
 269         layerParams.set(newKey, layerParams.get(oldKey));
 270         layerParams.erase(oldKey);
 271     }
 272 }
 273
 274 static
 275 void dumpValueInfoProto(int i, const opencv_onnx::ValueInfoProto& valueInfoProto, const std::string& prefix)
 276 {
 277     CV_Assert(valueInfoProto.has_name());
 278     CV_Assert(valueInfoProto.has_type());
 279     const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
 280     CV_Assert(typeProto.has_tensor_type());
 281     const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
 282     CV_Assert(tensor.has_shape());
 283     const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
 284
 285     int dim_size = tensorShape.dim_size();
 286     CV_CheckGE(dim_size, 0, "");
 287     MatShape shape(dim_size);
 288     for (int j = 0; j < dim_size; ++j)
 289     {
 290         const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
 291         if (dimension.has_dim_param())
 292         {
 293             CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
 294         }
 295         // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
 296         if (dimension.has_denotation())
 297         {
 298             CV_LOG_INFO(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
 299         }
 300         shape[j] = dimension.dim_value();
 301     }
 302     CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << " as '" << valueInfoProto.name() << "'] shape=" << toString(shape));
 303 }
 304
 305 static
 306 void dumpTensorProto(int i, const opencv_onnx::TensorProto& tensorProto, const std::string& prefix)
 307 {
 308     if (utils::logging::getLogLevel() < utils::logging::LOG_LEVEL_VERBOSE)
 309         return;
 310     int dim_size = tensorProto.dims_size();
 311     CV_CheckGE(dim_size, 0, "");
 312     MatShape shape(dim_size);
 313     for (int j = 0; j < dim_size; ++j)
 314     {
 315         int sz = static_cast<int>(tensorProto.dims(j));
 316         shape[j] = sz;
 317     }
 318     CV_LOG_VERBOSE(NULL, 0, "DNN/ONNX: " << prefix << "[" << i << " as '" << tensorProto.name() << "'] shape=" << toString(shape) << " data_type=" << (int)tensorProto.data_type());
 319 }
 320
 321 void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto)
 322 {
 323     if (!tensor_proto.raw_data().empty()) {
 324         delete tensor_proto.release_raw_data();
 325     }
 326 }
 327
 328 void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
 329               std::vector<Mat>& outputs)
 330 {
 331     Ptr<Layer> layer = LayerFactory::createLayerInstance(params.type, params);
 332     CV_Assert((bool)layer);
 333
 334     std::vector<MatShape> inpShapes(inputs.size());
 335     int ddepth = params.get<int>("depth", CV_32F);
 336     for (size_t i = 0; i < inputs.size(); ++i)
 337     {
 338         inpShapes[i] = shape(inputs[i]);
 339         if (i > 0 && ddepth != inputs[i].depth())
 340             CV_Error(Error::StsNotImplemented, "Mixed input data types.");
 341         ddepth = inputs[i].depth();
 342     }
 343
 344     std::vector<MatShape> outShapes, internalShapes;
 345     layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes);
 346
 347     std::vector<Mat> internals(internalShapes.size());
 348     outputs.resize(outShapes.size());
 349     for (size_t i = 0; i < outShapes.size(); ++i)
 350         outputs[i].create(outShapes[i], ddepth);
 351     for (size_t i = 0; i < internalShapes.size(); ++i)
 352         internals[i].create(internalShapes[i], ddepth);
 353
 354     layer->finalize(inputs, outputs);
 355     layer->forward(inputs, outputs, internals);
 356 }
 357
 358 std::map<std::string, Mat> ONNXImporter::getGraphTensors(
 359                                         const opencv_onnx::GraphProto& graph_proto)
 360 {
 361     std::map<std::string, Mat> layers_weights;
 362
 363     for (int i = 0; i < graph_proto.initializer_size(); i++)
 364     {
 365         const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i);
 366         dumpTensorProto(i, tensor_proto, "initializer");
 367         Mat mat = getMatFromTensor(tensor_proto);
 368         releaseONNXTensor(const_cast<opencv_onnx::TensorProto&>(tensor_proto));  // drop already loaded data
 369
 370         if (DNN_DIAGNOSTICS_RUN && mat.empty())
 371             continue;
 372
 373         layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
 374     }
 375     return layers_weights;
 376 }
 377
 378 static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
 379     std::vector<int32_t> dst(src.size());
 380     convertInt64ToInt32(src, dst, src.size());
 381     return DictValue::arrayInt(&dst[0], src.size());
 382 }
 383
 384 static DictValue parseStr(const ::google::protobuf::RepeatedPtrField< ::std::string>& src) {
 385     return DictValue::arrayString(src.begin(), static_cast<int>(src.size()));
 386 }
 387
 388 LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto)
 389 {
 390     LayerParams lp;
 391     for(int i = 0; i < node_proto.attribute_size(); i++)
 392     {
 393         opencv_onnx::AttributeProto attribute_proto = node_proto.attribute(i);
 394         std::string attribute_name = attribute_proto.name();
 395
 396         try
 397         {
 398             if(attribute_name == "kernel_shape")
 399             {
 400                 CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
 401                 lp.set("kernel_size", parse(attribute_proto.ints()));
 402             }
 403             else if(attribute_name == "strides")
 404             {
 405                 CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
 406                 lp.set("stride", parse(attribute_proto.ints()));
 407             }
 408             else if(attribute_name == "pads")
 409             {
 410                 if (node_proto.op_type() == "Pad")
 411                 {
 412                     // Padding layer.
 413                     // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
 414                     // We need to shuffle it to begin0, end0, begin1, end1, ...
 415                     CV_Assert(attribute_proto.ints_size() % 2 == 0);
 416                     const int dims = attribute_proto.ints_size() / 2;
 417                     std::vector<int32_t> paddings;
 418                     paddings.reserve(attribute_proto.ints_size());
 419                     for (int i = 0; i < dims; ++i)
 420                     {
 421                         paddings.push_back(attribute_proto.ints(i));
 422                         paddings.push_back(attribute_proto.ints(dims + i));
 423                     }
 424                     lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
 425                 }
 426                 else
 427                 {
 428                     // Convolution or pooling.
 429                     CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
 430                     lp.set("pad", parse(attribute_proto.ints()));
 431                 }
 432             }
 433             else if(attribute_name == "auto_pad")
 434             {
 435                 if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") {
 436                     lp.set("pad_mode",  "SAME");
 437                 }
 438                 else if (attribute_proto.s() == "VALID") {
 439                     lp.set("pad_mode", "VALID");
 440                 }
 441             }
 442             else if(attribute_name == "dilations")
 443             {
 444                 CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
 445                 lp.set("dilation", parse(attribute_proto.ints()));
 446             }
 447             else if(attribute_name == "activations" && node_proto.op_type() == "LSTM")
 448             {
 449                 lp.set(attribute_name, parseStr(attribute_proto.strings()));
 450             }
 451             else if (attribute_proto.has_i())
 452             {
 453                 ::google::protobuf::int64 src = attribute_proto.i();
 454                 if (src < std::numeric_limits<int32_t>::min() || src > std::numeric_limits<int32_t>::max())
 455                     CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range");
 456                 else
 457                     lp.set(attribute_name, saturate_cast<int32_t>(src));
 458             }
 459             else if (attribute_proto.has_f())
 460             {
 461                 lp.set(attribute_name, attribute_proto.f());
 462             }
 463             else if (attribute_proto.has_s())
 464             {
 465                 lp.set(attribute_name, attribute_proto.s());
 466             }
 467             else if (attribute_proto.floats_size() > 0)
 468             {
 469                 lp.set(attribute_name, DictValue::arrayReal(
 470                     attribute_proto.floats().data(), attribute_proto.floats_size()));
 471             }
 472             else if (attribute_proto.ints_size() > 0)
 473             {
 474                 lp.set(attribute_name, parse(attribute_proto.ints()));
 475             }
 476             else if (attribute_proto.has_t())
 477             {
 478                 opencv_onnx::TensorProto tensor = attribute_proto.t();
 479                 Mat blob = getMatFromTensor(tensor);
 480                 lp.blobs.push_back(blob);
 481             }
 482             else if (attribute_proto.has_g())
 483             {
 484                 CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str()));
 485             }
 486             else if (attribute_proto.graphs_size() > 0)
 487             {
 488                 CV_Error(Error::StsNotImplemented,
 489                         cv::format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported",
 490                                 attribute_name.c_str(), attribute_proto.graphs_size())
 491                 );
 492             }
 493             else if (attribute_proto.strings_size() > 0)
 494             {
 495                 std::string msg = cv::format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported",
 496                         attribute_name.c_str(), attribute_proto.strings_size());
 497                 CV_LOG_ERROR(NULL, msg);
 498                 for (int i = 0; i < attribute_proto.strings_size(); i++)
 499                 {
 500                     CV_LOG_ERROR(NULL, "    Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'");
 501                 }
 502                 CV_Error(Error::StsNotImplemented, msg);
 503             }
 504             else if (attribute_proto.tensors_size() > 0)
 505             {
 506                 CV_Error(Error::StsNotImplemented,
 507                         cv::format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported",
 508                                 attribute_name.c_str(), attribute_proto.tensors_size())
 509                 );
 510             }
 511             else
 512             {
 513                 CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str()));
 514             }
 515         }
 516         catch (const cv::Exception& e)
 517         {
 518             CV_UNUSED(e);
 519             if (DNN_DIAGNOSTICS_RUN)
 520             {
 521                 CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem with processing attributes for node " << node_proto.name() << " Attribute " << attribute_name.c_str()
 522                 );
 523                 continue;
 524             }
 525             throw;
 526         }
 527     }
 528     return lp;
 529 }
 530
 531 Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, int index)
 532 {
 533     CV_Assert(index < node_proto.input_size());
 534     const std::string& input_name = node_proto.input(index);
 535     return getBlob(input_name);
 536 }
 537
 538 Mat ONNXImporter::getBlob(const std::string& input_name)
 539 {
 540     std::map<std::string, Mat>::const_iterator constBlob = constBlobs.find(input_name);
 541     if (constBlob == constBlobs.end())
 542     {
 543         CV_Error(Error::StsBadArg, std::string("Blob ") + input_name + " not found in const blobs");
 544     }
 545     return constBlob->second;
 546 }
 547
 548 void ONNXImporter::addLayer(LayerParams& layerParams,
 549                             const opencv_onnx::NodeProto& node_proto)
 550 {
 551     int depth = layerParams.get<int>("depth", CV_32F);
 552     int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams);
 553     for (int i = 0; i < node_proto.output_size(); ++i)
 554     {
 555         layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i)));
 556     }
 557
 558     std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
 559     int inpNum = 0;
 560     for (int j = 0; j < node_proto.input_size(); j++)
 561     {
 562         const std::string& input_name = node_proto.input(j);
 563         IterLayerId_t layerId = layer_id.find(input_name);
 564         if (layerId != layer_id.end()) {
 565             dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum);
 566             ++inpNum;
 567             // Collect input shapes.
 568             IterShape_t shapeIt = outShapes.find(input_name);
 569             CV_Assert(shapeIt != outShapes.end());
 570             layerInpShapes.push_back(shapeIt->second);
 571         }
 572     }
 573     // Compute shape of output blob for this layer.
 574     Ptr<Layer> layer = dstNet.getLayer(id);  // FIXIT: avoid instantiation of layers during the import stage
 575     layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
 576     for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i)
 577     {
 578         outShapes[node_proto.output(i)] = layerOutShapes[i];
 579     }
 580 }
 581
 582 /** @brief Make N copies of input layer and set them as input to node_proto.
 583  * @param prefix prefix of new layers' names
 584  * @param node_proto node which will contain all copies as inputs
 585  * @param input name of the node to copy
 586  * @param n number of copies
 587  */
 588 void ONNXImporter::expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
 589                              const std::string& input, size_t n)
 590 {
 591     std::vector<std::string> input_names;
 592     input_names.reserve(n);
 593     for (size_t j = 0; j < n; j++)
 594     {
 595         LayerParams copyLP;
 596         copyLP.name = format("%s/copy_%zu", prefix.c_str(), j);
 597         copyLP.type = "Identity";
 598         CV_Assert((layer_id.find(copyLP.name) == layer_id.end()) &&
 599             "Couldn't copy the node: generated name already exists in the graph.");
 600         input_names.push_back(copyLP.name);
 601
 602         node_proto.set_input(0, input);
 603         node_proto.set_output(0, copyLP.name);
 604         addLayer(copyLP, node_proto);
 605     }
 606     node_proto.clear_input();
 607     for (size_t i = 0; i < input_names.size(); i++)
 608     {
 609         node_proto.add_input(input_names[i]);
 610     }
 611 }
 612
 613 /** @brief Multiply one of node_proto inputs by -1
 614  * @param layerParams parameters of the node
 615  * @param node_proto node which input will be replaced
 616  * @param input_id id of input to be multiplied by -1
 617  */
 618 void ONNXImporter::addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id)
 619 {
 620     LayerParams powerParams;
 621     powerParams.name = layerParams.name + "/neg";
 622     powerParams.type = "Power";
 623     powerParams.set("scale", -1.f);
 624
 625     //Create Power layer
 626     int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
 627     //Connect to input
 628     IterLayerId_t layerId = layer_id.find(node_proto.input(input_id));
 629     CV_Assert(layerId != layer_id.end());
 630     dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
 631     //Add shape
 632     layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
 633     outShapes[powerParams.name] = outShapes[node_proto.input(input_id)];
 634
 635     //Replace input to Power
 636     node_proto.set_input(input_id, powerParams.name);
 637 }
 638
 639 void ONNXImporter::addConstant(const std::string& name, const Mat& blob)
 640 {
 641     CV_LOG_DEBUG(NULL, "DNN/ONNX: add constant '" << name << "' shape=" << toString(shape(blob)) << ": " << toString(blob));
 642     constBlobs.insert(std::make_pair(name, blob));
 643     outShapes.insert(std::make_pair(name, shape(blob)));
 644 }
 645
 646 void ONNXImporter::parseOperatorSet()
 647 {
 648     int ir_version = model_proto.has_ir_version() ? static_cast<int>(model_proto.ir_version()) : -1;
 649     if (ir_version < 3)
 650         return;
 651
 652     int opset_size = model_proto.opset_import_size();
 653     if (opset_size <= 0)
 654     {
 655         CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information")
 656         return;
 657     }
 658
 659     for (int i = 0; i < opset_size; ++i)
 660     {
 661         const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i);
 662         const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string();
 663         int version = opset_entry.has_version() ? opset_entry.version() : -1;
 664         if (domain.empty() || domain == str_domain_ai_onnx)
 665         {
 666             // ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md
 667             onnx_opset = std::max(onnx_opset, version);
 668             onnx_opset_map[str_domain_ai_onnx] = onnx_opset;
 669         }
 670         else
 671         {
 672             CV_LOG_DEBUG(NULL, "DNN/ONNX: using non-standard ONNX opset[" << i << "]: domain='" << domain << "' version=" << version);
 673             onnx_opset_map[domain] = onnx_opset;
 674         }
 675     }
 676
 677     CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset);
 678
 679     buildDispatchMap_ONNX_AI(onnx_opset);
 680     for (const auto& pair : onnx_opset_map)
 681     {
 682         if (pair.first == str_domain_ai_onnx)
 683         {
 684             continue;  // done above
 685         }
 686         else if (pair.first == "com.microsoft")
 687         {
 688             buildDispatchMap_COM_MICROSOFT(pair.second);
 689         }
 690         else
 691         {
 692             CV_LOG_INFO(NULL, "DNN/ONNX: unknown domain='" << pair.first << "' version=" << pair.second << ". No dispatch map, you may need to register 'custom' layers.");
 693         }
 694     }
 695 }
 696
 697 void ONNXImporter::handleQuantizedNode(LayerParams& layerParams,
 698                                        const opencv_onnx::NodeProto& node_proto)
 699 {
 700     // Quantized nodes have output names ending with 'quantized'
 701     std::string outName = node_proto.output(0);
 702     int len = outName.length();
 703     if (len <= 9)
 704         return;
 705
 706     if (outName.substr(len - 9) == "quantized")
 707     {
 708         outName = outName.substr(0, len - 9);
 709         Mat scale, zeropoint;
 710
 711         if (constBlobs.find(outName + "scale") != constBlobs.end() &&
 712             constBlobs.find(outName + "zero_point") != constBlobs.end())
 713         {
 714             scale = getBlob(outName + "scale");
 715             zeropoint = getBlob(outName + "zero_point");
 716         }
 717         else
 718         {
 719             std::string inpName = node_proto.input(0);
 720             inpName = inpName.substr(0, inpName.length() - 9);
 721             scale = getBlob(inpName + "scale");
 722             zeropoint = getBlob(inpName + "zero_point");
 723
 724             for (int i = 0; i < node_proto.output_size(); i++)
 725             {
 726                 std::string out = node_proto.output(i);
 727                 out = out.substr(0, out.length() - 9);
 728                 addConstant(out + "scale", scale);
 729                 addConstant(out + "zero_point", zeropoint);
 730             }
 731         }
 732
 733         if (scale.total() != 1 || zeropoint.total() != 1)
 734             CV_Error(Error::StsNotImplemented, "Per-channel scales/zeropoints are not supported");
 735
 736         layerParams.set("depth", CV_8S);
 737         layerParams.set("scales", DictValue::arrayReal(scale.ptr<float>(), 1));
 738         layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr<int8_t>(), 1));
 739     }
 740 }
 741
 742 void ONNXImporter::populateNet()
 743 {
 744     CV_Assert(model_proto.has_graph());
 745     graph_proto = model_proto.graph();
 746
 747     std::string framework_version;
 748     if (model_proto.has_producer_name())
 749         framework_name = model_proto.producer_name();
 750     if (model_proto.has_producer_version())
 751         framework_version = model_proto.producer_version();
 752
 753     CV_LOG_INFO(NULL, "DNN/ONNX: loading ONNX"
 754             << (model_proto.has_ir_version() ? cv::format(" v%d", (int)model_proto.ir_version()) : cv::String())
 755             << " model produced by '" << framework_name << "'"
 756             << (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str()))
 757             << ". Number of nodes = " << graph_proto.node_size()
 758             << ", initializers = " << graph_proto.initializer_size()
 759             << ", inputs = " << graph_proto.input_size()
 760             << ", outputs = " << graph_proto.output_size()
 761             );
 762
 763     parseOperatorSet();
 764
 765     simplifySubgraphs(graph_proto);
 766
 767     const int layersSize = graph_proto.node_size();
 768     CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes");
 769
 770     constBlobs = getGraphTensors(graph_proto);  // scan GraphProto.initializer
 771     std::vector<String> netInputs;  // map with network inputs (without const blobs)
 772     // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
 773     for (int i = 0; i < graph_proto.input_size(); ++i)
 774     {
 775         const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i);
 776         CV_Assert(valueInfoProto.has_name());
 777         const std::string& name = valueInfoProto.name();
 778         CV_Assert(valueInfoProto.has_type());
 779         const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
 780         CV_Assert(typeProto.has_tensor_type());
 781         const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
 782         CV_Assert(tensor.has_shape());
 783         const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
 784
 785         int dim_size = tensorShape.dim_size();
 786         CV_CheckGE(dim_size, 0, "");  // some inputs are scalars (dims=0), e.g. in Test_ONNX_nets.Resnet34_kinetics test
 787         MatShape inpShape(dim_size);
 788         for (int j = 0; j < dim_size; ++j)
 789         {
 790             const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
 791             if (dimension.has_dim_param())
 792             {
 793                 CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
 794             }
 795             // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
 796             if (dimension.has_denotation())
 797             {
 798                 CV_LOG_INFO(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
 799             }
 800             inpShape[j] = dimension.dim_value();
 801             // NHW, NCHW(NHWC), NCDHW(NDHWC); do not set this flag if only N is dynamic
 802             if (dimension.has_dim_param() && !(j == 0 && inpShape.size() >= 3))
 803             {
 804                 hasDynamicShapes = true;
 805             }
 806         }
 807         bool isInitialized = ((constBlobs.find(name) != constBlobs.end()));
 808         CV_LOG_IF_DEBUG(NULL, !isInitialized, "DNN/ONNX: input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
 809         CV_LOG_IF_VERBOSE(NULL, 0, isInitialized, "DNN/ONNX: pre-initialized input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
 810         if (dim_size > 0 && !hasDynamicShapes)  // FIXIT result is not reliable for models with multiple inputs
 811         {
 812             inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
 813         }
 814         outShapes[valueInfoProto.name()] = inpShape;
 815         // fill map: push layer name, layer id and output id
 816         if (!isInitialized)
 817         {
 818             netInputs.push_back(name);
 819             layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1)));
 820         }
 821     }
 822
 823     dstNet.setInputsNames(netInputs);
 824
 825     // dump outputs
 826     for (int i = 0; i < graph_proto.output_size(); ++i)
 827     {
 828         dumpValueInfoProto(i, graph_proto.output(i), "output");
 829     }
 830
 831     if (DNN_DIAGNOSTICS_RUN) {
 832         CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!");
 833         layerHandler->fillRegistry(graph_proto);
 834     }
 835
 836     for(int li = 0; li < layersSize; li++)
 837     {
 838         const opencv_onnx::NodeProto& node_proto = graph_proto.node(li);
 839         handleNode(node_proto);
 840     }
 841
 842     CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN ? "DNN/ONNX: diagnostic run completed!" : "DNN/ONNX: import completed!"));
 843 }
 844
 845 std::string ONNXImporter::getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto)
 846 {
 847     if (!node_proto.has_domain())
 848         return str_domain_ai_onnx;
 849     const std::string& domain = node_proto.domain();
 850     if (domain.empty())
 851         return str_domain_ai_onnx;
 852     return domain;
 853 }
 854
 855 const ONNXImporter::DispatchMap& ONNXImporter::getDispatchMap(const opencv_onnx::NodeProto& node_proto)
 856 {
 857     static DispatchMap empty_map;
 858     const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
 859     auto it = domain_dispatch_map.find(layer_type_domain);
 860     if (it == domain_dispatch_map.end())
 861     {
 862         return empty_map;
 863     }
 864
 865     return it->second;
 866 }
 867
 868 void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto)
 869 {
 870     CV_Assert(node_proto.output_size() >= 1);
 871     const std::string& name = node_proto.output(0);
 872     const std::string& layer_type = node_proto.op_type();
 873     const std::string& layer_type_domain = getLayerTypeDomain(node_proto);
 874     const auto& dispatch = getDispatchMap(node_proto);
 875
 876     CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and "
 877                                                          << node_proto.output_size() << " outputs: "
 878                                                          << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
 879                                                          << cv::format(" from %sdomain='", onnx_opset_map.count(layer_type_domain) == 1 ? "" : "undeclared ")
 880                                                          << layer_type_domain << "'"
 881     );
 882
 883     if (dispatch.empty())
 884     {
 885         CV_LOG_WARNING(NULL, "DNN/ONNX: missing dispatch map for domain='" << layer_type_domain << "'");
 886     }
 887
 888     LayerParams layerParams;
 889     try
 890     {
 891         // FIXIT not all cases can be repacked into "LayerParams". Importer should handle such cases directly for each "layer_type"
 892         layerParams = getLayerParams(node_proto);
 893
 894         layerParams.name = name;
 895         layerParams.type = layer_type;
 896         layerParams.set("has_dynamic_shapes", hasDynamicShapes);
 897
 898         handleQuantizedNode(layerParams, node_proto);
 899
 900         DispatchMap::const_iterator iter = dispatch.find(layer_type);
 901         if (iter != dispatch.end())
 902         {
 903             CALL_MEMBER_FN(*this, iter->second)(layerParams, node_proto);
 904         }
 905         else
 906         {
 907             parseCustomLayer(layerParams, node_proto);
 908         }
 909     }
 910     catch (const cv::Exception& e)
 911     {
 912         if (DNN_DIAGNOSTICS_RUN)
 913         {
 914             CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
 915                     << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
 916                     << " from domain='" << layer_type_domain << "'"
 917                     << "\n" << e.msg
 918             );
 919             cv::AutoLock lock(getLayerFactoryMutex());
 920             auto registeredLayers = getLayerFactoryImpl();
 921             if (registeredLayers.find(layerParams.type) != registeredLayers.end())
 922             {
 923                 try
 924                 {
 925                     Ptr<Layer> layer = LayerFactory::createLayerInstance(layerParams.type, layerParams);
 926                 }
 927                 catch (const std::exception& e)
 928                 {
 929                     CV_LOG_ERROR(NULL, "DNN/ONNX: Layer of type " << layerParams.type << "(" << layer_type << ") cannot be created with parameters " << layerParams << ". Error: " << e.what()
 930                     );
 931                 }
 932             }
 933         }
 934         else
 935         {
 936             CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
 937                     << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
 938                     << " from domain='" << layer_type_domain << "'"
 939             );
 940         }
 941         for (int i = 0; i < node_proto.input_size(); i++)
 942         {
 943             CV_LOG_INFO(NULL, "    Input[" << i << "] = '" << node_proto.input(i) << "'");
 944         }
 945         for (int i = 0; i < node_proto.output_size(); i++)
 946         {
 947             CV_LOG_INFO(NULL, "    Output[" << i << "] = '" << node_proto.output(i) << "'");
 948         }
 949         if (DNN_DIAGNOSTICS_RUN)
 950         {
 951             for (int i = 0; i < node_proto.output_size(); ++i)
 952             {
 953                 layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(0, i)));
 954                 outShapes[node_proto.output(i)] = outShapes[node_proto.input(0)];
 955             }
 956         }
 957         else
 958             CV_Error(Error::StsError, cv::format("Node [%s@%s]:(%s) parse error: %s", layer_type.c_str(), layer_type_domain.c_str(), name.c_str(), e.what()));
 959     }
 960 }
 961
 962 void ONNXImporter::parseArg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
 963 {
 964     const std::string& layer_type = node_proto.op_type();
 965     layerParams.type = "Arg";
 966     layerParams.set("op", layer_type == "ArgMax" ? "max" : "min");
 967     addLayer(layerParams, node_proto);
 968 }
 969
 970 void setCeilMode(LayerParams& layerParams)
 971 {
 972     // auto_pad attribute is deprecated and uses ceil
 973     if (layerParams.has("pad_mode"))
 974     {
 975         layerParams.set("ceil_mode", true);
 976     }
 977     else if (!layerParams.has("ceil_mode"))
 978     {
 979         layerParams.set("ceil_mode", false);
 980     }
 981 }
 982
 983 void ONNXImporter::parseMaxUnpool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
 984 {
 985     layerParams.type = "MaxUnpool";
 986
 987     DictValue kernel_shape = layerParams.get("kernel_size");
 988     CV_Assert(kernel_shape.size() == 2);
 989     layerParams.set("pool_k_w", kernel_shape.get<int>(0));
 990     layerParams.set("pool_k_h", kernel_shape.get<int>(1));
 991
 992     int pool_pad_w = 0, pool_pad_h = 0;
 993     if (layerParams.has("pad"))
 994     {
 995         DictValue pads = layerParams.get("pad");
 996         CV_CheckEQ(pads.size(), 2, "");
 997         pool_pad_w = pads.get<int>(0);
 998         pool_pad_h = pads.get<int>(1);
 999     }
1000     layerParams.set("pool_pad_w", pool_pad_w);
1001     layerParams.set("pool_pad_h", pool_pad_h);
1002
1003
1004     int pool_stride_w = 1, pool_stride_h = 1;
1005     if (layerParams.has("stride"))
1006     {
1007         DictValue strides = layerParams.get("stride");
1008         CV_CheckEQ(strides.size(), 2, "");
1009         pool_stride_w = strides.get<int>(0);
1010         pool_stride_h = strides.get<int>(1);
1011     }
1012     layerParams.set("pool_stride_w", pool_stride_w);
1013     layerParams.set("pool_stride_h", pool_stride_h);
1014
1015     addLayer(layerParams, node_proto);
1016 }
1017
1018 void ONNXImporter::parseMaxPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1019 {
1020     int depth = layerParams.get<int>("depth", CV_32F);
1021     layerParams.type = (depth == CV_8S) ? "PoolingInt8" : "Pooling";
1022     layerParams.set("pool", "MAX");
1023     setCeilMode(layerParams);
1024     addLayer(layerParams, node_proto);
1025 }
1026
1027 void ONNXImporter::parseAveragePool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1028 {
1029     layerParams.type = "Pooling";
1030     layerParams.set("pool", "AVE");
1031     setCeilMode(layerParams);
1032     layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
1033     addLayer(layerParams, node_proto);
1034 }
1035
1036 void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1037 {
1038     opencv_onnx::NodeProto node_proto = node_proto_;
1039     const std::string& layer_type = node_proto.op_type();
1040
1041     CV_Assert(node_proto.input_size() == 1);
1042     layerParams.type = "Pooling";
1043     String pool;
1044     if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax")
1045         pool = "MAX";
1046     else if (layer_type == "ReduceSum")
1047         pool = "SUM";
1048     else
1049         pool = "AVE";
1050     layerParams.set("pool", pool);
1051     layerParams.set("global_pooling", !layerParams.has("axes"));
1052     bool keepdims = layerParams.get<int>("keepdims", 1) == 1;
1053     if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
1054     {
1055         MatShape inpShape = outShapes[node_proto.input(0)];
1056         DictValue axes = layerParams.get("axes");
1057         MatShape targetShape;
1058         std::vector<bool> shouldDelete(inpShape.size(), false);
1059         for (int i = 0; i < axes.size(); i++) {
1060             int axis = normalize_axis(axes.get<int>(i), inpShape.size());
1061             shouldDelete[axis] = true;
1062         }
1063         for (int axis = 0; axis < inpShape.size(); ++axis){
1064             if (!shouldDelete[axis])
1065                 targetShape.push_back(inpShape[axis]);
1066             else if (keepdims)
1067                 targetShape.push_back(1);
1068         }
1069
1070         if (inpShape.size() == 3 && axes.size() <= 2)
1071         {
1072             int axis = normalize_axis(axes.get<int>(0), inpShape.size());
1073             CV_CheckNE(axis, 0, "");
1074
1075             LayerParams reshapeLp;
1076             reshapeLp.name = layerParams.name + "/reshape";
1077             reshapeLp.type = "Reshape";
1078             CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
1079             reshapeLp.set("axis", 0);
1080             reshapeLp.set("num_axes", 1);
1081             int newShape[] = {1, -1};
1082             reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2));
1083
1084             opencv_onnx::NodeProto proto;
1085             proto.add_input(node_proto.input(0));
1086             proto.add_output(reshapeLp.name);
1087             addLayer(reshapeLp, proto);
1088
1089             LayerParams avgLp;
1090             avgLp.name = layerParams.name + "/avg";
1091             avgLp.type = "Pooling";
1092             CV_Assert(layer_id.find(avgLp.name) == layer_id.end());
1093             avgLp.set("pool", pool);
1094             if (axes.size() == 2)
1095             {
1096                 CV_CheckEQ(normalize_axis(axes.get<int>(0), inpShape.size()), 1, "Unsupported mode");
1097                 CV_CheckEQ(normalize_axis(axes.get<int>(1), inpShape.size()), 2, "Unsupported mode");
1098                 avgLp.set("global_pooling", true);
1099             }
1100             else
1101             {
1102                 avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true);
1103                 avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1);
1104             }
1105
1106             node_proto.set_input(0, reshapeLp.name);
1107             node_proto.set_output(0, avgLp.name);
1108             addLayer(avgLp, node_proto);
1109         }
1110         else
1111         {
1112             if (inpShape.size() != 4 && inpShape.size() != 5)
1113                 CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation.");
1114
1115             CV_Assert(axes.size() <= inpShape.size() - 2);
1116             std::vector<int> kernel_size(inpShape.size() - 2, 1);
1117             if (axes.size() == 1 && (normalize_axis(axes.get<int>(0), inpShape.size()) <= 1))
1118             {
1119                 int axis = normalize_axis(axes.get<int>(0), inpShape.size());
1120                 MatShape newShape = inpShape;
1121                 newShape[axis + 1] = total(newShape, axis + 1);
1122                 newShape.resize(axis + 2);
1123                 newShape.insert(newShape.begin(), 2 - axis, 1);
1124
1125                 LayerParams reshapeLp;
1126                 reshapeLp.type = "Reshape";
1127                 reshapeLp.name = layerParams.name + "/reshape";
1128                 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
1129                 reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size()));
1130
1131                 node_proto.set_output(0, reshapeLp.name);
1132                 addLayer(reshapeLp, node_proto);
1133
1134                 kernel_size.resize(2);
1135                 kernel_size[0] = inpShape[axis];
1136                 node_proto.set_input(0, node_proto.output(0));
1137             }
1138             else
1139             {
1140                 for (int i = 0; i < axes.size(); i++) {
1141                     int axis = normalize_axis(axes.get<int>(i), inpShape.size());
1142                     CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
1143                     kernel_size[axis - 2] = inpShape[axis];
1144                 }
1145             }
1146
1147             LayerParams poolLp = layerParams;
1148             poolLp.name = layerParams.name + "/avg";
1149             CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
1150             poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
1151
1152             node_proto.set_output(0, poolLp.name);
1153             addLayer(poolLp, node_proto);
1154         }
1155
1156         layerParams.type = "Reshape";
1157         layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));
1158
1159         node_proto.set_input(0, node_proto.output(0));
1160         node_proto.set_output(0, layerParams.name);
1161     }
1162     else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
1163     {
1164         IterShape_t shapeIt = outShapes.find(node_proto.input(0));
1165         CV_Assert(shapeIt != outShapes.end());
1166         const size_t dims = keepdims ? shapeIt->second.size() : 1;
1167
1168         LayerParams reshapeLp;
1169         reshapeLp.name = layerParams.name + "/reshape";
1170         reshapeLp.type = "Reshape";
1171         CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
1172         int newShape[] = {1, 1, 1, -1};
1173         reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4));
1174
1175         opencv_onnx::NodeProto proto;
1176         proto.add_input(node_proto.input(0));
1177         proto.add_output(reshapeLp.name);
1178         addLayer(reshapeLp, proto);
1179
1180         LayerParams poolLp = layerParams;
1181         poolLp.name = layerParams.name + "/pool";
1182         CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
1183
1184         node_proto.set_input(0, reshapeLp.name);
1185         node_proto.set_output(0, poolLp.name);
1186         addLayer(poolLp, node_proto);
1187
1188         layerParams.type = "Reshape";
1189         std::vector<int> targetShape(dims, 1);
1190         layerParams.set("dim", DictValue::arrayInt(targetShape.data(), targetShape.size()));
1191
1192         node_proto.set_input(0, node_proto.output(0));
1193         node_proto.set_output(0, layerParams.name);
1194     }
1195     addLayer(layerParams, node_proto);
1196 }
1197
1198 void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1199 {
1200     int axis = 0;
1201     std::vector<int> begin;
1202     std::vector<int> end;
1203     std::vector<int> steps;
1204     int inp_size = node_proto.input_size();
1205
1206     if (inp_size == 1)
1207     {
1208         if (layerParams.has("axes")) {
1209             DictValue axes = layerParams.get("axes");
1210             for (int i = 1; i < axes.size(); ++i) {
1211                 CV_Assert(axes.get<int>(i - 1) == axes.get<int>(i) - 1);
1212             }
1213             axis = axes.get<int>(0);
1214         }
1215
1216         DictValue starts = layerParams.get("starts");
1217         DictValue ends = layerParams.get("ends");
1218         CV_Assert(starts.size() == ends.size());
1219
1220         if (axis > 0) {
1221             CV_CheckLE(axis, 1024, "Slice layer can't have more than 1024 axes"); // arbitrary limit
1222             begin.resize(axis, 0);
1223             end.resize(axis, -1);
1224         }
1225         for (int i = 0; i < starts.size(); ++i)
1226         {
1227             begin.push_back(starts.get<int>(i));
1228             int finish = ends.get<int>(i);
1229             end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim
1230         }
1231     } else { // inp_size > 1
1232         CV_Assert(inp_size >= 3);
1233         for (int i = 1; i < inp_size; i++) {
1234             CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end());
1235         }
1236         Mat start_blob = getBlob(node_proto, 1);
1237         Mat end_blob   = getBlob(node_proto, 2);
1238         CV_Assert(start_blob.total() == end_blob.total());
1239
1240         if (inp_size > 3) {
1241             Mat axes_blob = getBlob(node_proto, 3);
1242             const int* axes = (int*)axes_blob.data;
1243             for (int i = 1; i < axes_blob.total(); ++i) {
1244                 CV_Assert(axes[i - 1] == axes[i] - 1);
1245             }
1246             axis = axes[0];
1247         }
1248
1249         const int* starts = start_blob.ptr<int>();
1250         const int* ends   = end_blob.ptr<int>();
1251         if (axis > 0) {
1252             begin.resize(axis, 0);
1253             end.resize(axis, -1);
1254         }
1255         std::copy(starts, starts + start_blob.total(), std::back_inserter(begin));
1256         for (int i = 0; i < end_blob.total(); ++i)
1257         {
1258             int finish = ends[i];
1259             end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim
1260         }
1261
1262         if (inp_size == 5) {
1263             CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end());
1264             Mat step_blob = getBlob(node_proto, 4);
1265             const int* steps_ptr = step_blob.ptr<int>();
1266
1267             if (axis > 0)
1268                 steps.resize(axis, 1);
1269
1270             std::copy(steps_ptr, steps_ptr + step_blob.total(), std::back_inserter(steps));
1271
1272             // Very strange application for Slice op with tensor reversing.
1273             // We just workaround it for 2d constants.
1274             if (constBlobs.find(node_proto.input(0)) != constBlobs.end() &&
1275                 axis == 0 &&
1276                 start_blob.at<int>(0) == -1 && step_blob.at<int>(0) == -1 &&
1277                 end_blob.at<int>(0) == std::numeric_limits<int32_t>::min())
1278             {
1279                 Mat inp = getBlob(node_proto, 0);
1280                 if (inp.dims == 2)
1281                 {
1282                     Mat flipped;
1283                     flip(inp, flipped, 0);
1284                     addConstant(layerParams.name, flipped);
1285                     return;
1286                 }
1287             }
1288         }
1289     }
1290     layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size()));
1291     layerParams.set("end", DictValue::arrayInt(&end[0], end.size()));
1292     layerParams.set("axis", axis);
1293
1294     if (!steps.empty())
1295         layerParams.set("steps", DictValue::arrayInt(&steps[0], steps.size()));
1296
1297     if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1298     {
1299         Mat inp = getBlob(node_proto, 0);
1300         std::vector<Mat> inputs, sliced;
1301         inputs.push_back(inp);
1302         runLayer(layerParams, inputs, sliced);
1303         CV_Assert(sliced.size() == 1);
1304         addConstant(layerParams.name, sliced[0]);
1305         return;
1306     }
1307     addLayer(layerParams, node_proto);
1308 }
1309
1310 void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1311 {
1312     if (layerParams.has("split"))
1313     {
1314         DictValue splits = layerParams.get("split");
1315         const int numSplits = splits.size();
1316         CV_Assert(numSplits > 1);
1317
1318         std::vector<int> slicePoints(numSplits - 1, splits.get<int>(0));
1319         for (int i = 1; i < splits.size() - 1; ++i)
1320         {
1321             slicePoints[i] = slicePoints[i - 1] + splits.get<int>(i);
1322         }
1323         layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
1324     }
1325     else
1326     {
1327         layerParams.set("num_split", node_proto.output_size());
1328     }
1329     int depth = layerParams.get<int>("depth", CV_32F);
1330     layerParams.type = (depth == CV_8S) ? "SliceInt8" : "Slice";
1331     layerParams.set("axis", layerParams.get<float>("axis", 0));
1332     addLayer(layerParams, node_proto);
1333 }
1334
1335 void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1336 {
1337     opencv_onnx::NodeProto node_proto = node_proto_;
1338     const std::string& layer_type = node_proto.op_type();
1339     bool isSub = layer_type == "Sub";
1340
1341     if (layer_type == "Sum" && node_proto.input_size() == 1)
1342     {
1343         layerParams.type = "Identity";
1344         addLayer(layerParams, node_proto);
1345         return;
1346     }
1347
1348     CV_Assert((node_proto.input_size() == 2) || (layer_type == "Sum" && node_proto.input_size() > 2));
1349
1350     if (layer_type == "Sum" && node_proto.input_size() > 2)
1351     {
1352         for (int i = 0; i < node_proto.input_size(); ++i)
1353         {
1354             if (layer_id.find(node_proto.input(i)) == layer_id.end())
1355             {
1356                 CV_Error(Error::StsNotImplemented, "Sum of constants is not implemented for inputs > 2");
1357             }
1358         }
1359     }
1360
1361     bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end();
1362     bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end();
1363     if (is_const_0 && is_const_1)
1364     {
1365         Mat blob_0 = getBlob(node_proto, 0);
1366         Mat blob_1 = getBlob(node_proto, 1);
1367         CV_Assert(blob_0.size == blob_1.size);
1368         Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1);
1369         addConstant(layerParams.name, output);
1370         return;
1371     }
1372     else if (is_const_0 || is_const_1)
1373     {
1374         int const_blob_id = is_const_0 ? 0 : 1;
1375         int input_id = 1 - const_blob_id;
1376         Mat blob = getBlob(node_proto, const_blob_id);
1377         int blob_total = blob.total();
1378
1379         const float inputScale = isSub && is_const_0 ? -1.f : 1.f;
1380         const float constScale = isSub && is_const_1 ? -1.f : 1.f;
1381
1382         if (blob_total == 1) {
1383             layerParams.type = "Power";
1384             layerParams.set("scale", inputScale);
1385             layerParams.set("shift", constScale * blob.ptr<float>()[0]);
1386         }
1387         else {
1388             MatShape inpShape = outShapes[node_proto.input(input_id)];
1389             if (shape(blob) == inpShape)
1390             {
1391                 LayerParams constParams;
1392                 constParams.name = layerParams.name + "/const";
1393                 constParams.type = "Const";
1394                 constParams.blobs.push_back(blob);
1395                 int id = dstNet.addLayer(constParams.name, constParams.type, constParams);
1396                 layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
1397                 outShapes[constParams.name] = shape(blob);
1398
1399                 layerParams.type = "Eltwise";
1400                 float coeffs[] = {1., isSub ? -1.f : 1.f};
1401                 layerParams.set("coeff", DictValue::arrayReal<float*>(coeffs, 2));
1402                 node_proto.set_input(const_blob_id, constParams.name);
1403             }
1404             else
1405             {
1406                 if (inputScale < 0.f)
1407                 {
1408                     addNegation(layerParams, node_proto, input_id);
1409                 }
1410
1411                 layerParams.type = "Scale";
1412                 layerParams.set("bias_term", true);
1413                 int axis = 1;
1414                 for (int i = 0; i < graph_proto.initializer_size(); i++)
1415                 {
1416                     opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
1417                     if (tensor_proto.name() == node_proto.input(const_blob_id))
1418                     {
1419                         axis = inpShape.size() - tensor_proto.dims_size();
1420                         break;
1421                     }
1422                 }
1423                 layerParams.set("axis", axis);
1424                 blob = blob.reshape(1, 1);
1425                 layerParams.blobs.push_back(constScale * blob);
1426             }
1427         }
1428     }
1429     else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
1430     {
1431         layerParams.type = "Eltwise";
1432         if (isSub)
1433         {
1434             static float subCoeffs[] = {1.f, -1.f};
1435             layerParams.set("coeff", DictValue::arrayReal<float*>(subCoeffs, 2));
1436         }
1437     }
1438     else
1439     {
1440         if (isSub)
1441         {
1442             addNegation(layerParams, node_proto, 1);
1443         }
1444         layerParams.type = "Scale";
1445         layerParams.set("bias_term", true);
1446     }
1447     addLayer(layerParams, node_proto);
1448 }
1449
1450 void ONNXImporter::parsePow(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1451 {
1452     if (layer_id.find(node_proto.input(1)) != layer_id.end())
1453         CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power");
1454
1455     Mat blob = getBlob(node_proto, 1);
1456     if (blob.total() != 1)
1457         CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power");
1458
1459     blob.convertTo(blob, CV_32F);
1460     layerParams.type = "Power";
1461     layerParams.set("power", blob.ptr<float>()[0]);
1462     addLayer(layerParams, node_proto);
1463 }
1464
1465 // "Min" "Max"
1466 void ONNXImporter::parseMinMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1467 {
1468     const std::string& layer_type = node_proto.op_type();
1469     layerParams.type = "Eltwise";
1470     layerParams.set("operation", layer_type == "Max" ? "max" : "min");
1471     addLayer(layerParams, node_proto);
1472 }
1473
1474 void ONNXImporter::parseNeg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1475 {
1476     layerParams.type = "Power";
1477     layerParams.set("scale", -1);
1478     addLayer(layerParams, node_proto);
1479 }
1480
1481 void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1482 {
1483     CV_Assert(node_proto.input_size() == 0);
1484     CV_Assert(layerParams.blobs.size() == 1);
1485     addConstant(layerParams.name, layerParams.blobs[0]);
1486 }
1487
1488 void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1489 {
1490     opencv_onnx::NodeProto node_proto = node_proto_;
1491     LayerParams lstmParams = layerParams;
1492     lstmParams.name += "/lstm";
1493
1494     // https://pytorch.org/docs/stable/nn.html#lstm
1495     CV_Assert(node_proto.input_size() >= 7);
1496     Mat Wx = getBlob(node_proto, 1);
1497     Mat Wh = getBlob(node_proto, 2);
1498     Mat b = getBlob(node_proto, 3);
1499
1500     const int numHidden = lstmParams.get<int>("hidden_size");
1501     const int numDirs = Wx.size[0];  // Is 1 for forward only and 2 for bidirectional LSTM.
1502     const int numFeatures = Wx.size[2];
1503
1504     // Following checks are deduced from the IFGO->IGFO loop below
1505     // Wx is numDirs X numHidden*3 X numFeatures
1506     // Wh is numDirs X numHidden*3 X numHidden
1507     CV_CheckLE(numHidden * 3, Wx.size[1], "Wx should have beat  least 3x hidden_size in dimension 1");
1508     CV_CheckLE(numHidden * 3, Wh.size[1], "Wh should have be at least 3x hidden_size in dimension 1");
1509     CV_CheckLE(numHidden, Wh.size[2], "Wh should have be at least hidden_size in dimension 2");
1510
1511     Mat h0, c0;
1512     if (!node_proto.input(5).empty()) {
1513         h0 = getBlob(node_proto, 5);
1514         h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
1515     } else {
1516         // initial_h attribute can be empty in case of keras2onnx producer. fill it with zeros
1517         h0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1);
1518     }
1519     if (!node_proto.input(6).empty()) {
1520         c0 = getBlob(node_proto, 6);
1521         c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
1522     } else {
1523         // initial_c attribute can be empty in case of keras2onnx producer. fill it with zeros
1524         c0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1);
1525     }
1526
1527     b = b.reshape(1, b.size[0]);
1528     Mat bx = b.colRange(0, b.cols / 2);
1529     Mat bh = b.colRange(b.cols / 2, b.cols);
1530     b = bx + bh;
1531
1532     // b is numDirs X numHidden*3
1533     CV_CheckLE(numHidden * 3, b.cols, "Bias data should have at least 3x hidden_size columns");
1534
1535     // IFGO->IGFO
1536     for (int k = 0; k < numDirs; ++k)
1537     {
1538         float* WxData = Wx.ptr<float>(k);
1539         float* WhData = Wh.ptr<float>(k);
1540         float* biasData = b.ptr<float>(k);
1541         for (int j = 0; j < numHidden; ++j)
1542         {
1543             for (int i = 0; i < numFeatures; ++i)
1544             {
1545                 std::swap(WxData[(numHidden + j) * numFeatures + i],
1546                           WxData[(numHidden * 2 + j) * numFeatures + i]);
1547             }
1548             for (int i = 0; i < numHidden; ++i)
1549             {
1550                 std::swap(WhData[(numHidden + j) * numHidden + i],
1551                           WhData[(numHidden * 2 + j) * numHidden + i]);
1552             }
1553             std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]);
1554         }
1555     }
1556     Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
1557     Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
1558
1559
1560     lstmParams.blobs.resize(5);
1561     lstmParams.blobs[0] = Wh;
1562     lstmParams.blobs[1] = Wx;
1563     lstmParams.blobs[2] = b;
1564     lstmParams.blobs[3] = h0;
1565     lstmParams.blobs[4] = c0;
1566
1567     // read direction attribute
1568     lstmParams.set("reverse", lstmParams.get<String>("direction", "") == "reverse");
1569     lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional");
1570
1571     node_proto.set_output(0, lstmParams.name);  // set different name so output shapes will be registered on that name
1572     addLayer(lstmParams, node_proto);
1573
1574     MatShape lstmShape = outShapes[node_proto.output(0)];
1575
1576     // Add fake 1 as it is done in ONNX
1577     lstmShape.insert(lstmShape.begin() + 1, 1);
1578
1579     layerParams.type = "Reshape";
1580     layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size()));
1581     node_proto.set_input(0, lstmParams.name);  // redirect input to LSTM
1582     node_proto.set_output(0, layerParams.name);  // keep origin LSTM's name
1583     addLayer(layerParams, node_proto);
1584 }
1585
1586 void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1587 {
1588     opencv_onnx::NodeProto node_proto = node_proto_;
1589     LayerParams gruParams = layerParams;
1590     gruParams.name += "/gru";
1591
1592     // https://pytorch.org/docs/stable/generated/torch.nn.GRU.html?highlight=gru#
1593     CV_Assert(node_proto.input_size() == 6);
1594     Mat Wx = getBlob(node_proto, 1);
1595     Mat Wh = getBlob(node_proto, 2);
1596     Mat b = getBlob(node_proto, 3);
1597     Mat h0 = getBlob(node_proto, 5);
1598
1599     Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
1600     Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
1601     h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
1602     b = b.reshape(1, b.size[0]);
1603
1604     gruParams.blobs.resize(4);
1605     gruParams.blobs[0] = Wh;
1606     gruParams.blobs[1] = Wx;
1607     gruParams.blobs[2] = b;
1608     gruParams.blobs[3] = h0;
1609     gruParams.set("bidirectional", gruParams.get<String>("direction", "") == "bidirectional");
1610
1611     node_proto.set_output(0, gruParams.name);  // set different name so output shapes will be registered on that name
1612     addLayer(gruParams, node_proto);
1613
1614     MatShape gruShape = outShapes[node_proto.output(0)];
1615
1616     // Add fake 1 as it is done in ONNX
1617     gruShape.insert(gruShape.begin() + 1, 1);
1618
1619     layerParams.type = "Reshape";
1620     layerParams.set("dim", DictValue::arrayInt(&gruShape[0], gruShape.size()));
1621     node_proto.set_input(0, gruParams.name);  // redirect input to GRU
1622     node_proto.set_output(0, layerParams.name);  // keep origin GRU's name
1623     addLayer(layerParams, node_proto);
1624 }
1625
1626 void ONNXImporter::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1627 {
1628     const float scale = layerParams.has("scale") ? layerParams.get<float>("scale") : 1.0f;
1629     layerParams.erase("scale");
1630
1631     if (layerParams.has("bias"))
1632     {
1633         layerParams.type = "Scale";
1634         layerParams.blobs.push_back(
1635                 Mat(Size(1,  layerParams.get("bias").size()), CV_32FC1, scale));
1636
1637         layerParams.set("bias_term", true);
1638         Mat bias(1, layerParams.get("bias").size(), CV_32FC1);
1639         for (int j = 0; j < bias.total(); j++) {
1640             bias.at<float>(0, j) = layerParams.get("bias").getRealValue(j);
1641         }
1642         layerParams.blobs.push_back(bias);
1643         layerParams.erase("bias");
1644     }
1645     else {
1646         layerParams.set("scale", scale);
1647         layerParams.type = "Power";
1648     }
1649     addLayer(layerParams, node_proto);
1650 }
1651
1652 void ONNXImporter::parseClip(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1653 {
1654     CV_CheckEQ(node_proto.input_size(), 1, "");
1655     layerParams.type = "ReLU6";
1656     layerParams.set("min_value", layerParams.get<float>("min", -FLT_MAX));
1657     layerParams.set("max_value", layerParams.get<float>("max", FLT_MAX));
1658     addLayer(layerParams, node_proto);
1659 }
1660
1661 void ONNXImporter::parseLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1662 {
1663     layerParams.type = "ReLU";
1664     layerParams.set("negative_slope", layerParams.get<float>("alpha", 0.01));
1665     addLayer(layerParams, node_proto);
1666 }
1667
1668 void ONNXImporter::parseRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1669 {
1670     layerParams.type = "ReLU";
1671     addLayer(layerParams, node_proto);
1672 }
1673
1674 void ONNXImporter::parseElu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1675 {
1676     layerParams.type = "ELU";
1677     addLayer(layerParams, node_proto);
1678 }
1679
1680 void ONNXImporter::parseTanh(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1681 {
1682     layerParams.type = "TanH";
1683     addLayer(layerParams, node_proto);
1684 }
1685
1686 void ONNXImporter::parseAbs(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1687 {
1688     layerParams.type = "AbsVal";
1689     addLayer(layerParams, node_proto);
1690 }
1691
1692 void ONNXImporter::parseCompare(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1693 {
1694     CV_Assert(node_proto.input_size() == 2);
1695     const std::string& layer_type = node_proto.op_type();
1696
1697     bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end();
1698     bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end();
1699
1700     if (is_const_0 || is_const_1)
1701     {
1702         Mat blob = getBlob(node_proto, static_cast<int>(is_const_1));
1703         blob = blob.reshape(1, 1);
1704         layerParams.blobs.push_back(blob);
1705     }
1706
1707     layerParams.type = "Compare";
1708
1709     if (layer_type == "Equal")
1710         layerParams.set("mode", "equal");
1711     else if (layer_type == "Greater")
1712         layerParams.set("mode", "greater");
1713     else
1714         layerParams.set("mode", "less");
1715     addLayer(layerParams, node_proto);
1716 }
1717
1718 void ONNXImporter::parsePRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1719 {
1720     layerParams.type = "PReLU";
1721     layerParams.blobs.push_back(getBlob(node_proto, 1));
1722     addLayer(layerParams, node_proto);
1723 }
1724
1725 void ONNXImporter::parseLRN(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1726 {
1727     replaceLayerParam(layerParams, "size", "local_size");
1728     addLayer(layerParams, node_proto);
1729 }
1730
1731 void ONNXImporter::parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1732 {
1733     opencv_onnx::NodeProto node_proto = node_proto_;
1734     if (node_proto.input_size() != 3)
1735         CV_Error(Error::StsNotImplemented,
1736                  "Expected input, scale, bias");
1737
1738     layerParams.blobs.resize(4);
1739     layerParams.blobs[2] = getBlob(node_proto, 1);  // weightData
1740     layerParams.blobs[3] = getBlob(node_proto, 2);  // biasData
1741     layerParams.set("has_bias", true);
1742     layerParams.set("has_weight", true);
1743
1744     // Get number of channels in input
1745     int size = layerParams.blobs[2].total();
1746     layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean
1747     layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std
1748
1749     LayerParams mvnParams;
1750     mvnParams.name = layerParams.name + "/MVN";
1751     mvnParams.type = "MVN";
1752     mvnParams.set("eps", layerParams.get<float>("epsilon"));
1753     layerParams.erase("epsilon");
1754
1755     //Create MVN layer
1756     int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams);
1757     //Connect to input
1758     IterLayerId_t layerId = layer_id.find(node_proto.input(0));
1759     CV_Assert(layerId != layer_id.end());
1760     dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
1761     //Add shape
1762     layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0)));
1763     outShapes[mvnParams.name] = outShapes[node_proto.input(0)];
1764
1765     //Replace Batch Norm's input to MVN
1766     node_proto.set_input(0, mvnParams.name);
1767     layerParams.type = "BatchNorm";
1768     addLayer(layerParams, node_proto);
1769 }
1770
1771 void ONNXImporter::parseBatchNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1772 {
1773     if (node_proto.input_size() != 5)
1774         CV_Error(Error::StsNotImplemented,
1775                  "Expected input, scale, bias, mean and var");
1776
1777     layerParams.type = "BatchNorm";
1778     replaceLayerParam(layerParams, "epsilon", "eps");
1779     replaceLayerParam(layerParams, "spatial", "use_global_stats");
1780
1781     Mat meanData = getBlob(node_proto, 3);
1782     Mat stdData =  getBlob(node_proto, 4);
1783
1784     layerParams.blobs.push_back(meanData);
1785     layerParams.blobs.push_back(stdData);
1786
1787     if (!node_proto.input(1).empty()) {
1788         layerParams.set("has_weight", true);
1789         layerParams.blobs.push_back(getBlob(node_proto, 1));  // weightData
1790     } else {
1791         layerParams.set("has_weight", false);
1792     }
1793
1794     if (!node_proto.input(2).empty()) {
1795         layerParams.set("has_bias", true);
1796         layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData
1797     } else {
1798         layerParams.set("has_bias", false);
1799     }
1800     addLayer(layerParams, node_proto);
1801 }
1802
1803 void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1804 {
1805     CV_Assert(node_proto.input_size() >= 2);
1806     layerParams.type = "InnerProduct";
1807     Mat weights = getBlob(node_proto, 1);
1808     int ind_num_out = 0;
1809     if (layerParams.has("transB") && !layerParams.get<int>("transB")) {
1810         transpose(weights, weights);
1811         ind_num_out = 1;
1812     }
1813     layerParams.blobs.push_back(weights);
1814
1815     if (node_proto.input_size() == 3) {
1816         Mat bias = getBlob(node_proto, 2);
1817         layerParams.blobs.push_back(bias);
1818     }
1819     if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1820     {
1821         Mat inputBuf = getBlob(node_proto, 0);
1822
1823         LayerParams constParams;
1824         constParams.name = node_proto.input(0);
1825         constParams.type = "Const";
1826         constParams.blobs.push_back(inputBuf);
1827
1828         opencv_onnx::NodeProto proto;
1829         proto.add_output(constParams.name);
1830         addLayer(constParams, proto);
1831     }
1832
1833     layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]);
1834     layerParams.set("bias_term", node_proto.input_size() == 3);
1835     addLayer(layerParams, node_proto);
1836 }
1837
1838 void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
1839 {
1840     CV_Assert(node_proto.input_size() == 2);
1841     layerParams.type = "InnerProduct";
1842     layerParams.set("bias_term", false);
1843     CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end());
1844     int firstInpDims = outShapes[node_proto.input(0)].size();
1845     int secondInpDims;
1846
1847     if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
1848     {
1849         Mat blob = getBlob(node_proto, 1);
1850         secondInpDims = blob.dims;
1851         layerParams.blobs.push_back(blob.t());
1852         layerParams.set("num_output", layerParams.blobs[0].size[0]);
1853     } else {
1854         secondInpDims = outShapes[node_proto.input(1)].size();
1855     }
1856     layerParams.set("axis", firstInpDims - secondInpDims + 1);
1857     addLayer(layerParams, node_proto);
1858 }
1859
1860 void findBroadAxis(const MatShape& broadShape, const MatShape& outShape, size_t& axis, int& broadAxis)
1861 {
1862     const size_t diff = outShape.size() - broadShape.size();
1863
1864     // find the first non-one element of the broadcasting shape
1865     axis = 0;
1866     for (; axis < broadShape.size() && broadShape[axis] == 1; ++axis) {}
1867
1868     // find the last non-one element of the broadcasting shape
1869     size_t endAxis = broadShape.size();
1870     for (; endAxis > axis && broadShape[endAxis - 1] == 1; --endAxis) {}
1871
1872     // find one between axis and endAxis - as it needs to be broadcasted,
1873     // dimensions from the left of axis and from the right of endAxis will be handled by Scale layer
1874     broadAxis = -1;
1875     for (size_t i = axis; i < endAxis; ++i)
1876     {
1877         size_t outAxis = i + diff;
1878         if (outShape[outAxis] == broadShape[i])
1879         {
1880             continue;
1881         }
1882
1883         // ensure we need to broadcast only 1 dimension in the middle
1884         CV_Assert(broadShape[i] == 1 && broadAxis == -1);
1885         broadAxis = static_cast<int>(outAxis);
1886     }
1887
1888     axis += diff;
1889 }
1890
1891 // "Mul" "Div"
1892 void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
1893 {
1894     opencv_onnx::NodeProto node_proto = node_proto_;
1895     const std::string& layer_type = node_proto.op_type();
1896     CV_Assert(node_proto.input_size() == 2);
1897
1898     bool isDiv = layer_type == "Div";
1899     int constId = -1;
1900     bool haveVariables = false;
1901     for (int i = 0; i < 2; ++i)
1902     {
1903         if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
1904             constId = i;
1905         else
1906             haveVariables = true;
1907     }
1908     if (constId != -1 && haveVariables)
1909     {
1910         Mat blob = getBlob(node_proto, constId);
1911         blob = blob.reshape(1, 1);
1912         if (blob.total() == 1) {
1913             float blob_value = blob.ptr<float>()[0];
1914             float coeff = blob_value;
1915             if (isDiv)
1916             {
1917                 coeff = 1.f / blob_value;
1918                 if (constId == 0)
1919                 {
1920                     // Power layer calculates (x*scale + shift)^power, so const/x -> (x * (1/const) + 0)^(-1)
1921                     layerParams.set("power", -1.f);
1922                 }
1923             }
1924             layerParams.set("scale", coeff);
1925             layerParams.type = "Power";
1926         }
1927         else {
1928             if (isDiv)
1929                 divide(1.0, blob, blob);
1930             layerParams.blobs.push_back(blob);
1931             layerParams.type = "Scale";
1932         }
1933     }
1934     else if (!haveVariables)
1935     {
1936         Mat inp0 = getBlob(node_proto, 0);
1937         Mat inp1 = getBlob(node_proto, 1);
1938
1939         if (inp0.size != inp1.size && (inp0.total() != 1 || inp1.total() != 1))
1940             CV_Error_(Error::StsNotImplemented, ("Different shapes case is not supported with constant inputs: %s", layer_type.c_str()));
1941
1942         if (inp0.total() == 1 && inp1.total() == 1 && inp0.dims != inp1.dims)
1943         {
1944             if (inp0.dims < inp1.dims)
1945             {
1946                 inp0 = inp0.reshape(1, inp1.dims, inp1.size);
1947                 inp0.dims = inp1.dims;
1948             }
1949             else
1950             {
1951                 inp1 = inp1.reshape(1, inp0.dims, inp0.size);
1952                 inp1.dims = inp0.dims;
1953             }
1954         }
1955
1956         Mat out;
1957         if (inp0.total() != inp1.total())
1958         {
1959             if (inp0.total() == 1)
1960             {
1961                 float inp0_value = inp0.ptr<float>()[0];
1962                 float coeff = isDiv ? 1.0 / inp0_value : inp0_value;
1963                 multiply(inp1, coeff, out);
1964             }
1965             else
1966             {
1967                 float inp1_value = inp1.ptr<float>()[0];
1968                 float coeff = isDiv ? 1.0 / inp1_value : inp1_value;
1969                 multiply(inp0, coeff, out);
1970             }
1971
1972         }
1973         else
1974         {
1975             out = isDiv ? inp0 / inp1 : inp0.mul(inp1);
1976         }
1977
1978         if (inp0.dims == 1 && inp1.dims == 1)
1979             out.dims = 1;  // to workaround dims == 1
1980         addConstant(layerParams.name, out);
1981         return;
1982     }
1983     else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
1984     {
1985         layerParams.type = "Eltwise";
1986         layerParams.set("operation", isDiv ? "div" : "prod");
1987     }
1988     else
1989     {
1990         // Scale layer allocate output with the first input shape
1991         if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)]))
1992         {
1993             opencv_onnx::NodeProto proto;
1994             proto.add_input(node_proto.input(1));
1995             proto.add_input(node_proto.input(0));
1996             proto.add_output(layerParams.name);
1997             node_proto = proto;
1998         }
1999
2000         if (isDiv)
2001         {
2002             LayerParams powerParams;
2003             powerParams.name = layerParams.name + "/inv";
2004             powerParams.type = "Power";
2005             powerParams.set("power", -1);
2006
2007             //Create Power layer
2008             int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
2009             //Connect to input
2010             IterLayerId_t layerId = layer_id.find(node_proto.input(1));
2011             CV_Assert(layerId != layer_id.end());
2012             dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
2013             //Add shape
2014             layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
2015             outShapes[powerParams.name] = outShapes[node_proto.input(1)];
2016
2017             //Replace input to Power
2018             node_proto.set_input(1, powerParams.name);
2019         }
2020
2021         const MatShape& broadShape = outShapes[node_proto.input(1)];
2022         const MatShape& outShape = outShapes[node_proto.input(0)];
2023
2024         size_t axis = 0;
2025         int broadAxis = -1;
2026         findBroadAxis(broadShape, outShape, axis, broadAxis);
2027
2028         // if there is a one dimension in the middle that should be broadcasted, broadcast it
2029         if (broadAxis != -1)
2030         {
2031             opencv_onnx::NodeProto concat_node_proto = node_proto;
2032             const std::string& input1 = concat_node_proto.input(1);
2033
2034             expandMid(layerParams.name, concat_node_proto, input1, outShape[broadAxis]);
2035
2036             LayerParams concatLP;
2037             concatLP.name = layerParams.name + "/concat";
2038             concatLP.set("axis", broadAxis);
2039             concatLP.type = "Concat";
2040             concat_node_proto.set_output(0, concatLP.name);
2041
2042             addLayer(concatLP, concat_node_proto);
2043             node_proto.set_input(1, concatLP.name);
2044         }
2045
2046         CV_Assert(axis != outShape.size());
2047         layerParams.set("axis", static_cast<int>(axis));
2048         layerParams.type = "Scale";
2049     }
2050     addLayer(layerParams, node_proto);
2051 }
2052
2053 void ONNXImporter::parseConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2054 {
2055     opencv_onnx::NodeProto node_proto = node_proto_;
2056     CV_Assert(node_proto.input_size() >= 2);
2057     layerParams.type = "Convolution";
2058     for (int j = 1; j < node_proto.input_size(); j++) {
2059         if (constBlobs.find(node_proto.input(j)) != constBlobs.end())
2060         {
2061             layerParams.blobs.push_back(getBlob(node_proto, j));
2062         }
2063     }
2064     int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0];
2065     layerParams.set("num_output", outCn);
2066
2067     // Check for asymmetric padding in Conv2D
2068     if (layerParams.has("pad"))
2069     {
2070         bool asymmetricPadding = false;
2071         DictValue pads = layerParams.get("pad");
2072         const int dims = pads.size() / 2;
2073         for (int i = 0; i < dims; ++i)
2074         {
2075             if (pads.get<int>(i) != pads.get<int>(i + dims))
2076             {
2077                 asymmetricPadding = true;
2078                 break;
2079             }
2080         }
2081         if (asymmetricPadding && pads.size() == 4) // [pad_t, pad_l, pad_b, pad_r]
2082         {
2083             layerParams.erase("pad");
2084             // No paddings required for N, C axis
2085             std::vector<int> paddings(4, 0);
2086             // Add paddings for H, W axis
2087             for (int i = 0; i < dims; ++i)
2088             {
2089                 paddings.push_back(pads.get<int>(i));
2090                 paddings.push_back(pads.get<int>(dims + i));
2091             }
2092             LayerParams padLp;
2093             padLp.name = layerParams.name + "/pad";
2094             padLp.type = "Padding";
2095             padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
2096
2097             opencv_onnx::NodeProto proto;
2098             proto.add_input(node_proto.input(0));
2099             proto.add_output(padLp.name);
2100
2101             addLayer(padLp, proto);
2102             node_proto.set_input(0, padLp.name);
2103         }
2104     }
2105     addLayer(layerParams, node_proto);
2106 }
2107
2108 void ONNXImporter::parseConvTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2109 {
2110     CV_Assert(node_proto.input_size() >= 2);
2111     layerParams.type = "Deconvolution";
2112     for (int j = 1; j < node_proto.input_size(); j++) {
2113         layerParams.blobs.push_back(getBlob(node_proto, j));
2114     }
2115     layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get<int>("group", 1));
2116     layerParams.set("bias_term", node_proto.input_size() == 3);
2117
2118     if (!layerParams.has("kernel_size"))
2119         CV_Error(Error::StsNotImplemented,
2120                  "Required attribute 'kernel_size' is not present.");
2121
2122     if (layerParams.has("output_shape"))
2123     {
2124         const DictValue& outShape = layerParams.get("output_shape");
2125         DictValue strides = layerParams.get("stride");
2126         DictValue kernel = layerParams.get("kernel_size");
2127
2128         String padMode;
2129         std::vector<int> adjust_pads;
2130         if (layerParams.has("pad_mode"))
2131         {
2132             padMode = toUpperCase(layerParams.get<String>("pad_mode"));
2133             if (padMode != "SAME" && padMode != "VALID")
2134                 CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
2135
2136             for (int i = 0; i < strides.size(); i++)
2137             {
2138                 int sz = outShape.get<int>(2 + i);
2139                 int stride = strides.get<int>(i);
2140                 adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride :
2141                                                          (sz - kernel.get<int>(i)) % stride);
2142             }
2143             layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size()));
2144         }
2145     }
2146     else if (layerParams.has("output_padding"))
2147     {
2148         replaceLayerParam(layerParams, "output_padding", "adj");
2149     }
2150     addLayer(layerParams, node_proto);
2151 }
2152
2153 void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2154 {
2155     int depth = layerParams.get<int>("depth", CV_32F);
2156     layerParams.type = (depth == CV_8S) ? "PermuteInt8" : "Permute";
2157     replaceLayerParam(layerParams, "perm", "order");
2158     if (!layerParams.has("order")) {
2159         MatShape inpShape = outShapes[node_proto.input(0)];
2160         size_t dims = inpShape.size();
2161         std::vector<int> perm(dims);
2162         for (size_t d = 0; d < dims; ++d)
2163         {
2164             perm[d] = static_cast<int>(dims - 1 - d);
2165         }
2166         layerParams.set("order", DictValue::arrayInt(perm.data(), perm.size()));
2167     }
2168
2169     CV_Assert(node_proto.input_size() == 1);
2170     if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2171     {
2172         std::vector<Mat> inputs(1, getBlob(node_proto, 0)), transposed;
2173         runLayer(layerParams, inputs, transposed);
2174         CV_Assert(transposed.size() == 1);
2175         addConstant(layerParams.name, transposed[0]);
2176         return;
2177     }
2178     addLayer(layerParams, node_proto);
2179 }
2180
2181 void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2182 {
2183     CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
2184     DictValue axes_dict = layerParams.get("axes");
2185     MatShape inpShape = outShapes[node_proto.input(0)];
2186
2187     std::vector<bool> maskedAxes(inpShape.size(), false);
2188     for (int i = 0; i < axes_dict.size(); ++i)
2189     {
2190         int axis = axes_dict.getIntValue(i);
2191         CV_CheckLE(axis, static_cast<int>(inpShape.size()), "Squeeze axis");
2192         maskedAxes[axis] = inpShape[axis] == 1;
2193     }
2194     MatShape outShape;
2195     for (int i = 0; i < inpShape.size(); ++i)
2196     {
2197         if (!maskedAxes[i])
2198             outShape.push_back(inpShape[i]);
2199     }
2200     if (outShape.size() != inpShape.size())
2201     {
2202         layerParams.type = "Reshape";
2203         layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
2204         if (hasDynamicShapes)
2205         {
2206             std::vector<int> dynamicAxes;
2207             std::vector<int> inputIndices;
2208             for (int index = 0; index < inpShape.size(); ++index)
2209             {
2210                 if (!maskedAxes[index])
2211                     inputIndices.push_back(index);
2212             }
2213             for (int index = 0; index < outShape.size(); ++index)
2214                 dynamicAxes.push_back(index);
2215             layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
2216             layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
2217         }
2218     }
2219     else
2220         layerParams.type = "Identity";
2221
2222     if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2223     {
2224         Mat inp = getBlob(node_proto, 0);
2225         Mat out = inp.reshape(1, outShape);
2226         out.dims = outShape.size();  // to workaround dims == 1
2227         addConstant(layerParams.name, out);
2228         return;
2229     }
2230     int depth = layerParams.get<int>("depth", CV_32F);
2231     layerParams.type += (depth == CV_8S) ? "Int8" : "";
2232     addLayer(layerParams, node_proto);
2233 }
2234
2235 void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2236 {
2237     opencv_onnx::NodeProto node_proto = node_proto_;
2238     CV_CheckEQ(node_proto.input_size(), 1, "");
2239     int axis_ = layerParams.get<int>("axis", 1);
2240     if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2241     {
2242         Mat input = getBlob(node_proto, 0);
2243         int axis = normalize_axis(axis_, input.dims);
2244
2245         int out_size[2] = {1, 1};
2246         for (int i = 0; i < axis; ++i)
2247         {
2248             out_size[0] *= input.size[i];
2249         }
2250         for (int i = axis; i < input.dims; ++i)
2251         {
2252             out_size[1] *= input.size[i];
2253         }
2254
2255         Mat output = input.reshape(1, 2, out_size);
2256         addConstant(layerParams.name, output);
2257         return;
2258     }
2259     IterShape_t shapeIt = outShapes.find(node_proto.input(0));
2260     CV_Assert(shapeIt != outShapes.end());
2261     MatShape inpShape = shapeIt->second;
2262     int axis = normalize_axis(axis_, inpShape.size());
2263
2264     if (axis == 0 || axis == inpShape.size())
2265     {
2266         LayerParams reshapeLp;
2267         reshapeLp.name = layerParams.name + "/reshape";
2268         reshapeLp.type = "Reshape";
2269         CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
2270
2271         inpShape.insert(axis == 0 ? inpShape.begin() : inpShape.end(), 1);
2272         reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
2273
2274         opencv_onnx::NodeProto proto;
2275         proto.add_input(node_proto.input(0));
2276         proto.add_output(reshapeLp.name);
2277         addLayer(reshapeLp, proto);
2278         node_proto.set_input(0, reshapeLp.name);
2279         axis += 1;
2280     }
2281
2282     LayerParams first_pass;
2283     first_pass.name = layerParams.name + "/flatten";
2284     CV_Assert(layer_id.find(first_pass.name) == layer_id.end());
2285     first_pass.type = "Flatten";
2286     first_pass.set("axis", 0);
2287     first_pass.set("end_axis", axis - 1);
2288
2289     opencv_onnx::NodeProto proto;
2290     proto.add_input(node_proto.input(0));
2291     proto.add_output(first_pass.name);
2292     addLayer(first_pass, proto);
2293
2294     layerParams.set("axis", 1);
2295     node_proto.set_input(0, first_pass.name);
2296     addLayer(layerParams, node_proto);
2297 }
2298
2299 void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2300 {
2301     CV_Assert(node_proto.input_size() == 1 || node_proto.input_size() == 2);
2302     DictValue axes;
2303     if (node_proto.input_size() == 2)
2304     {
2305         Mat blob = getBlob(node_proto, 1);
2306         axes = DictValue::arrayInt(blob.ptr<int>(), blob.total());
2307     }
2308     else
2309         axes = layerParams.get("axes");
2310
2311     if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2312     {
2313         // Constant input.
2314         Mat input = getBlob(node_proto, 0);
2315
2316         std::vector<int> dims;
2317         for (int j = 0; j < input.dims; j++) {
2318             dims.push_back(input.size[j]);
2319         }
2320         CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size());
2321         for (int j = 0; j < axes.size(); j++) {
2322             const int idx = axes.getIntValue(j);
2323             CV_Assert(idx <= dims.size());
2324             dims.insert(dims.begin() + idx, 1);
2325         }
2326
2327         Mat out = input.reshape(0, dims);
2328         addConstant(layerParams.name, out);
2329         return;
2330     }
2331
2332     // Variable input.
2333     if (axes.size() != 1)
2334         CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze");
2335
2336     int depth = layerParams.get<int>("depth", CV_32F);
2337
2338     MatShape inpShape = outShapes[node_proto.input(0)];
2339     int axis = axes.getIntValue(0);
2340     CV_Assert(0 <= axis && axis <= inpShape.size());
2341     std::vector<int> outShape = inpShape;
2342     outShape.insert(outShape.begin() + axis, 1);
2343     layerParams.type = (depth == CV_8S) ? "ReshapeInt8" : "Reshape";
2344     layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
2345     if (hasDynamicShapes)
2346     {
2347         std::vector<int> dynamicAxes;
2348         std::vector<int> inputIndices;
2349         for (int index = 0; index < outShape.size(); ++index) {
2350             if (index != axis)
2351                 dynamicAxes.push_back(index);
2352         }
2353         for (int index = 0; index < inpShape.size(); ++index)
2354             inputIndices.push_back(index);
2355         layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
2356         layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
2357     }
2358     addLayer(layerParams, node_proto);
2359 }
2360
2361 void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2362 {
2363     opencv_onnx::NodeProto node_proto = node_proto_;
2364     CV_CheckEQ(node_proto.input_size(), 2, "");
2365     const std::string& input0 = node_proto.input(0);
2366     const std::string& input1 = node_proto.input(1);
2367     Mat newShapeMat = getBlob(input1);
2368     MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
2369
2370     MatShape inpShape;
2371     bool haveVariables = constBlobs.find(input0) == constBlobs.end();
2372     if (haveVariables)
2373     {
2374         IterShape_t shapeIt = outShapes.find(input0);
2375         CV_Assert(shapeIt != outShapes.end());
2376         inpShape = shapeIt->second;
2377     }
2378     else
2379     {
2380         inpShape = shape(getBlob(input0));
2381     }
2382
2383     String srcName = input0;
2384     // Unsqueeze and repeat along new axis
2385     if (targetShape.size() == inpShape.size() + 1)
2386     {
2387         inpShape.insert(inpShape.begin(), targetShape.size() - inpShape.size(), 1);
2388         for (int i = 0; i < targetShape.size(); i++)
2389         {
2390             if (abs(targetShape[i]) == 1)
2391                 targetShape[i] = inpShape[i];
2392         }
2393         if (haveVariables)
2394         {
2395             LayerParams reshapeLp;
2396             reshapeLp.name = layerParams.name + "/reshape";
2397             reshapeLp.type = "Reshape";
2398             CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
2399             reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
2400
2401             opencv_onnx::NodeProto proto;
2402             proto.add_input(node_proto.input(0));
2403             proto.add_output(reshapeLp.name);
2404             addLayer(reshapeLp, proto);
2405             srcName = reshapeLp.name;
2406         }
2407     }
2408     CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");
2409
2410     std::vector<int> broadcast_axes;
2411     // shapes aren't right-aligned here because targetShape.size() == inpShape.size()
2412     for (int i = 0; i < targetShape.size(); i++)
2413     {
2414         if (targetShape[i] != inpShape[i])
2415         {
2416             if (inpShape[i] == 1)
2417             {
2418                 broadcast_axes.push_back(i);
2419             }
2420             else if (targetShape[i] != 1)
2421             {
2422                 CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i));
2423             }
2424         }
2425     }
2426
2427     if (!haveVariables)
2428     {
2429         if (broadcast_axes.size() != 1)
2430             CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input");
2431
2432         Mat input = getBlob(node_proto, 0);
2433         input = input.reshape(0, total(inpShape, 0, broadcast_axes[0]));
2434         Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]);
2435         output = output.reshape(0, targetShape);
2436         addConstant(layerParams.name, output);
2437         return;
2438     }
2439
2440     if (broadcast_axes.size() == 2 &&
2441         broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1)
2442     {
2443         LayerParams constParams;
2444         constParams.name = layerParams.name + "/const";
2445         CV_Assert(layer_id.find(constParams.name) == layer_id.end());
2446         constParams.type = "Const";
2447
2448         Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr<int>(), CV_32F);
2449         constParams.blobs.push_back(inp);
2450
2451         opencv_onnx::NodeProto proto;
2452         proto.add_output(constParams.name);
2453         addLayer(constParams, proto);
2454
2455         layerParams.type = "Scale";
2456         layerParams.set("bias_term", false);
2457         node_proto.set_input(0, constParams.name);
2458         node_proto.set_input(1, srcName);
2459     }
2460     else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1)
2461     {
2462         expandMid(layerParams.name, node_proto, srcName, targetShape[broadcast_axes[0]]);
2463
2464         layerParams.set("axis", broadcast_axes[0]);
2465         layerParams.type = "Concat";
2466         node_proto.set_output(0, layerParams.name);
2467     }
2468     else if (broadcast_axes.empty())
2469     {
2470         layerParams.type = "Identity";
2471     }
2472     else
2473         CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
2474     addLayer(layerParams, node_proto);
2475 }
2476
2477 void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2478 {
2479     CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape"));
2480     int depth = layerParams.get<int>("depth", CV_32F);
2481     layerParams.type += (depth == CV_8S) ? "Int8" : "";
2482
2483     if (node_proto.input_size() == 2) {
2484         Mat blob = getBlob(node_proto, 1);
2485         CV_Assert(blob.type() == CV_32SC1);
2486
2487         layerParams.set("dim", DictValue::arrayInt<int*>(blob.ptr<int>(), blob.total()));
2488
2489         if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
2490             std::vector<Mat> inputs(1, getBlob(node_proto, 0)), outputs;
2491             runLayer(layerParams, inputs, outputs);
2492             addConstant(layerParams.name, outputs[0]);
2493             return;
2494         }
2495     }
2496     else {
2497         DictValue shape = layerParams.get("shape");
2498         std::vector<int> dim;
2499         for (int j = 0; j < shape.size(); j++) {
2500             dim.push_back(shape.getIntValue(j));
2501         }
2502
2503         if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
2504             Mat input = getBlob(node_proto, 0);
2505             Mat out = input.reshape(0, dim);
2506             addConstant(layerParams.name, out);
2507             return;
2508         }
2509         replaceLayerParam(layerParams, "shape", "dim");
2510     }
2511     addLayer(layerParams, node_proto);
2512 }
2513
2514 void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2515 {
2516     int depth = layerParams.get<int>("depth", CV_32F);
2517     layerParams.type = (depth == CV_8S) ? "PaddingInt8" : "Padding";
2518     replaceLayerParam(layerParams, "mode", "type");
2519     if (node_proto.input_size() == 3 || node_proto.input_size() == 2)
2520     {
2521         // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
2522         // We need to shuffle it to begin0, end0, begin1, end1, ...
2523         Mat paddings = getBlob(node_proto, 1).reshape(1, 2);
2524         paddings = paddings.t();
2525         layerParams.set("paddings", DictValue::arrayInt(paddings.ptr<int>(), paddings.total()));
2526
2527         if (node_proto.input_size() == 3)
2528         {
2529             Mat value = getBlob(node_proto, 2);
2530             float padValue = (depth == CV_8S) ? (float)value.ptr<int8_t>()[0] : value.ptr<float>()[0];
2531             layerParams.set("value", padValue);
2532         }
2533     }
2534     addLayer(layerParams, node_proto);
2535 }
2536
2537 void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2538 {
2539     CV_Assert(node_proto.input_size() == 1);
2540     IterShape_t shapeIt = outShapes.find(node_proto.input(0));
2541     CV_Assert(shapeIt != outShapes.end());
2542     const MatShape& inpShape = shapeIt->second;
2543
2544     int dims = static_cast<int>(inpShape.size());
2545     Mat shapeMat(dims, 1, CV_32S);
2546     bool isDynamicShape = false;
2547     for (int j = 0; j < dims; ++j)
2548     {
2549         int sz = inpShape[j];
2550         isDynamicShape |= (sz == 0);
2551         shapeMat.at<int>(j) = sz;
2552     }
2553     shapeMat.dims = 1;  // FIXIT Mat 1D
2554
2555     if (isDynamicShape)
2556     {
2557         CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0)));
2558         // FIXIT repair assertion
2559         // Disabled to pass face detector tests from #20422
2560         // CV_Assert(!isDynamicShape);  // not supported
2561     }
2562     addConstant(layerParams.name, shapeMat);
2563 }
2564
2565 void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2566 {
2567     if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
2568     {
2569         Mat blob = getBlob(node_proto, 0);
2570         int type;
2571         switch (layerParams.get<int>("to"))
2572         {
2573             case opencv_onnx::TensorProto_DataType_FLOAT:   type = CV_32F; break;
2574             case opencv_onnx::TensorProto_DataType_UINT8:   type = CV_8U; break;
2575             case opencv_onnx::TensorProto_DataType_UINT16:  type = CV_16U; break;
2576             case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break;
2577             case opencv_onnx::TensorProto_DataType_INT8:
2578             case opencv_onnx::TensorProto_DataType_INT16:
2579             case opencv_onnx::TensorProto_DataType_INT32:
2580             case opencv_onnx::TensorProto_DataType_INT64:   type = CV_32S; break;
2581             default: type = blob.type();
2582         }
2583         Mat dst;
2584         blob.convertTo(dst, type);
2585         dst.dims = blob.dims;
2586         addConstant(layerParams.name, dst);
2587         return;
2588     }
2589     else
2590         layerParams.type = "Identity";
2591     addLayer(layerParams, node_proto);
2592 }
2593
2594 void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2595 {
2596     int depth = CV_32F;
2597     float fill_value;
2598     if (!layerParams.blobs.empty())
2599     {
2600         CV_Assert(!layerParams.has("value"));
2601         depth = layerParams.blobs[0].depth();
2602         Mat floats;
2603         layerParams.blobs[0].convertTo(floats, CV_32F);
2604         fill_value = floats.at<float>(0, 0);
2605     }
2606     else
2607         fill_value = layerParams.get("value", 0);
2608
2609     MatShape inpShape = getBlob(node_proto, 0);
2610     for (int i = 0; i < inpShape.size(); i++)
2611         CV_CheckGT(inpShape[i], 0, "");
2612     Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value));
2613     addConstant(layerParams.name, tensor);
2614 }
2615
2616 void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2617 {
2618     opencv_onnx::NodeProto node_proto = node_proto_;
2619     CV_Assert(node_proto.input_size() == 2);
2620     Mat indexMat = getBlob(node_proto, 1);
2621     CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1);
2622     int index = indexMat.at<int>(0);
2623     int axis = layerParams.get<int>("axis", 0);
2624
2625     if ((constBlobs.find(node_proto.input(0)) != constBlobs.end()))
2626     {
2627         Mat input = getBlob(node_proto, 0);
2628         Mat out;
2629         std::vector<cv::Range> ranges(input.dims, Range::all());
2630         ranges[axis] = Range(index, index + 1);
2631
2632         out = input(ranges);
2633         MatShape outShape = shape(out);
2634         if (outShape.size() > 1)
2635         {
2636             outShape.erase(outShape.begin() + axis);
2637             out.reshape(0, outShape);
2638         } else {
2639             out.dims = 1;
2640         }
2641         addConstant(layerParams.name, out);
2642         return;
2643     }
2644     else
2645     {
2646         IterShape_t shapeIt = outShapes.find(node_proto.input(0));
2647         CV_Assert(shapeIt != outShapes.end());
2648         MatShape inpShape = shapeIt->second;
2649
2650         LayerParams sliceLp;
2651         sliceLp.type = "Slice";
2652         sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name;
2653         std::vector<int> begin(inpShape.size(), 0);
2654         std::vector<int> end(inpShape.size(), -1);
2655         begin[axis] = index;
2656         end[axis] = index + 1;
2657
2658         cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin.data(), begin.size());
2659         cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size());
2660         sliceLp.set("begin", paramBegin);
2661         sliceLp.set("end", paramEnd);
2662         sliceLp.set("has_dynamic_shapes", hasDynamicShapes);
2663
2664         if (inpShape.size() > 1)
2665         {
2666             opencv_onnx::NodeProto proto;
2667             proto.add_input(node_proto.input(0));
2668             proto.add_output(sliceLp.name);
2669             addLayer(sliceLp, proto);
2670
2671             inpShape.erase(inpShape.begin() + axis);
2672             layerParams.type = "Reshape";
2673             layerParams.set("axis", 0);
2674             layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
2675             if (hasDynamicShapes)
2676             {
2677                 std::vector<int> dynamicAxes;
2678                 std::vector<int> inputIndices;
2679                 for (int index = 0; index < inpShape.size(); ++index)
2680                     dynamicAxes.push_back(index);
2681                 for (int index = 0; index < inpShape.size(); ++index)
2682                     inputIndices.push_back(index);
2683                 layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
2684                 layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
2685             }
2686             node_proto.set_input(0, sliceLp.name);
2687         }
2688         else
2689         {
2690             layerParams = sliceLp;
2691         }
2692     }
2693     addLayer(layerParams, node_proto);
2694 }
2695
2696 void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2697 {
2698     bool hasVariableInps = false;
2699     for (int i = 0; i < node_proto.input_size(); ++i)
2700     {
2701         if (layer_id.find(node_proto.input(i)) != layer_id.end())
2702         {
2703             hasVariableInps = true;
2704             break;
2705         }
2706     }
2707
2708     if (!hasVariableInps)
2709     {
2710         std::vector<Mat> inputs(node_proto.input_size()), concatenated;
2711         // Due constant folding we can get inputs with different number of dimensions
2712         // Insert the missing dimension to inputs
2713         MatShape inputShape;
2714         for (size_t i = 0; i < inputs.size(); ++i)
2715         {
2716             inputs[i] = getBlob(node_proto, i);
2717             if (inputs[i].size.dims() > inputShape.size())
2718             {
2719                 inputShape = shape(inputs[i]);
2720             }
2721         }
2722
2723         // Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1
2724         int axis = layerParams.get<int>("axis", 1);
2725         for (size_t i = 0; i < inputs.size(); ++i)
2726         {
2727             MatShape targetShape = inputShape;
2728             targetShape[axis] = shape(inputs[i])[axis];
2729             CV_CheckEQ(total(targetShape), total(shape(inputs[i])), "");
2730             inputs[i] = inputs[i].reshape(0, targetShape);
2731         }
2732         runLayer(layerParams, inputs, concatenated);
2733
2734         CV_Assert(concatenated.size() == 1);
2735         addConstant(layerParams.name, concatenated[0]);
2736         return;
2737     }
2738     else
2739     {
2740         for (int i = 0; i < node_proto.input_size(); ++i)
2741         {
2742             if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
2743             {
2744                 LayerParams constParams;
2745                 constParams.name = node_proto.input(i);
2746                 constParams.type = "Const";
2747                 constParams.blobs.push_back(getBlob(node_proto, i));
2748
2749                 opencv_onnx::NodeProto proto;
2750                 proto.add_output(constParams.name);
2751                 addLayer(constParams, proto);
2752             }
2753         }
2754     }
2755     addLayer(layerParams, node_proto);
2756 }
2757
2758 // https://github.com/onnx/onnx/blob/master/docs/Operators.md#Resize
2759 void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2760 {
2761     for (int i = 1; i < node_proto.input_size(); i++)
2762         CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());
2763
2764     int depth = layerParams.get<int>("depth", CV_32F);
2765     layerParams.type += (depth == CV_8S) ? "Int8" : "";
2766
2767     if (layerParams.has("coordinate_transformation_mode"))
2768     {
2769         String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
2770         CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
2771
2772         layerParams.set("align_corners", interp_mode == "align_corners");
2773         if (layerParams.get<String>("mode") == "linear")
2774         {
2775             layerParams.set("mode", interp_mode == "pytorch_half_pixel" || interp_mode == "half_pixel" ?
2776                                     "opencv_linear" : "bilinear");
2777         }
2778     }
2779     if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
2780         layerParams.set("mode", "opencv_linear");
2781
2782     // opset-10: input = [X, scales]
2783     // opset-11: input = [X, roi, scales] or [x, roi, scales, sizes]
2784     int scalesInputId = node_proto.input_size() == 2 ? 1 : 2;
2785
2786     Mat scales = getBlob(node_proto, scalesInputId);
2787     if (!scales.empty())
2788     {
2789         CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected");
2790         layerParams.set("zoom_factor_y", scales.at<float>(2));
2791         layerParams.set("zoom_factor_x", scales.at<float>(3));
2792     }
2793     else if (node_proto.input_size() >= 4)  // opset-11
2794     {
2795         const std::string& inputSizes = node_proto.input(3);
2796         if (constBlobs.find(inputSizes) != constBlobs.end())
2797         {
2798             Mat shapes = getBlob(inputSizes);
2799             CV_CheckEQ(shapes.total(), (size_t)4, "HCHW layout is expected");
2800             CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
2801             if (shapes.depth() == CV_32F)
2802                 shapes.convertTo(shapes, CV_32S);
2803             layerParams.set("width", shapes.at<int>(3));
2804             layerParams.set("height", shapes.at<int>(2));
2805         }
2806         else
2807         {
2808             CV_Error(Error::StsNotImplemented, cv::format("ONNX/Resize: doesn't support dynamic non-constant 'sizes' input: %s", inputSizes.c_str()));
2809         }
2810     }
2811     else
2812     {
2813         CV_Error(Error::StsNotImplemented, "ONNX/Resize: can't find neither 'scale' nor destination sizes parameters");
2814     }
2815     replaceLayerParam(layerParams, "mode", "interpolation");
2816     addLayer(layerParams, node_proto);
2817 }
2818
2819 void ONNXImporter::parseUpsample(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2820 {
2821     //fused from Resize Subgraph
2822     if (layerParams.has("coordinate_transformation_mode"))
2823     {
2824         String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
2825         CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
2826
2827         layerParams.set("align_corners", interp_mode == "align_corners");
2828         if (layerParams.get<String>("mode") == "linear")
2829         {
2830             layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
2831                                     "opencv_linear" : "bilinear");
2832         }
2833     }
2834     if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
2835         layerParams.set("mode", "opencv_linear");
2836
2837     layerParams.type = "Resize";
2838     if (layerParams.has("scales"))
2839     {
2840         // Pytorch layer
2841         DictValue scales = layerParams.get("scales");
2842         CV_Assert(scales.size() == 4);
2843         layerParams.set("zoom_factor_y", scales.getIntValue(2));
2844         layerParams.set("zoom_factor_x", scales.getIntValue(3));
2845     }
2846     else if (layerParams.has("height_scale") && layerParams.has("width_scale"))
2847     {
2848         // Caffe2 layer
2849         replaceLayerParam(layerParams, "height_scale", "zoom_factor_y");
2850         replaceLayerParam(layerParams, "width_scale", "zoom_factor_x");
2851     }
2852     else
2853     {
2854         // scales as input
2855         const std::string& input1 = node_proto.input(1);
2856         if (constBlobs.find(input1) != constBlobs.end())
2857         {
2858             Mat scales = getBlob(input1);
2859             CV_Assert(scales.total() == 4);
2860             layerParams.set("zoom_factor_y", scales.at<float>(2));
2861             layerParams.set("zoom_factor_x", scales.at<float>(3));
2862         }
2863     }
2864     replaceLayerParam(layerParams, "mode", "interpolation");
2865     addLayer(layerParams, node_proto);
2866 }
2867
2868 void ONNXImporter::parseSoftMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2869 {
2870     const std::string& layer_type = node_proto.op_type();
2871     layerParams.type = "Softmax";
2872     layerParams.set("log_softmax", layer_type == "LogSoftmax");
2873     addLayer(layerParams, node_proto);
2874 }
2875
2876 void ONNXImporter::parseDetectionOutput(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
2877 {
2878     opencv_onnx::NodeProto node_proto = node_proto_;
2879     CV_CheckEQ(node_proto.input_size(), 3, "");
2880     if (constBlobs.find(node_proto.input(2)) != constBlobs.end())
2881     {
2882         Mat priors = getBlob(node_proto, 2);
2883
2884         LayerParams constParams;
2885         constParams.name = layerParams.name + "/priors";
2886         constParams.type = "Const";
2887         constParams.blobs.push_back(priors);
2888
2889         opencv_onnx::NodeProto priorsProto;
2890         priorsProto.add_output(constParams.name);
2891         addLayer(constParams, priorsProto);
2892
2893         node_proto.set_input(2, constParams.name);
2894     }
2895     addLayer(layerParams, node_proto);
2896 }
2897
2898 void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2899 {
2900     layerParams.type = "CumSum";
2901
2902     // Get axis.
2903     const std::string& input1 = node_proto.input(1);
2904
2905     if (constBlobs.find(input1) != constBlobs.end())
2906     {
2907         Mat axis_blob = getBlob(input1);
2908         CV_Assert(axis_blob.total() == 1u);
2909         layerParams.set("axis", axis_blob.at<int>(0));
2910     }
2911
2912     addLayer(layerParams, node_proto);
2913 }
2914
2915 void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2916 {
2917     for (int j = 0; j < node_proto.input_size(); j++) {
2918         if (layer_id.find(node_proto.input(j)) == layer_id.end())
2919             layerParams.blobs.push_back(getBlob(node_proto, j));
2920     }
2921     addLayer(layerParams, node_proto);
2922 }
2923
2924 void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2925 {
2926     const std::string& name = layerParams.name;
2927     std::string& layer_type = layerParams.type;
2928     const std::string& layer_type_domain = node_proto.has_domain() ? node_proto.domain() : std::string();
2929     if (!layer_type_domain.empty() && layer_type_domain != str_domain_ai_onnx)
2930     {
2931         // append ONNX domain name
2932         static bool DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME = utils::getConfigurationParameterBool("OPENCV_DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME", true);
2933         if (DNN_CUSTOM_ONNX_TYPE_INCLUDE_DOMAIN_NAME)
2934         {
2935             layer_type = layer_type_domain + "." + layer_type;
2936         }
2937     }
2938
2939     CV_LOG_IF_INFO(NULL, !LayerFactory::isLayerRegistered(layer_type), "DNN/ONNX: unknown node type, try using custom handler for node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
2940             << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
2941     );
2942
2943     parseSimpleLayers(layerParams, node_proto);
2944 }
2945
2946 void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2947 {
2948     CV_Assert(node_proto.input_size() == 3);
2949     layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize";
2950
2951     if (node_proto.op_type() == "DequantizeLinear")
2952     {
2953         Mat scale = getBlob(node_proto, 1);
2954         Mat zeropoint = getBlob(node_proto, 2);
2955
2956         layerParams.set("scales", DictValue::arrayReal(scale.ptr<float>(), 1));
2957         layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr<int8_t>(), 1));
2958     }
2959     addLayer(layerParams, node_proto);
2960 }
2961
2962 void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2963 {
2964     int ninputs = node_proto.input_size();
2965     CV_Assert(ninputs == 8 || ninputs == 9);
2966
2967     Mat inp_sc = getBlob(node_proto, 1);
2968     Mat inp_zp = getBlob(node_proto, 2);
2969
2970     Mat weights = getBlob(node_proto, 3);
2971     int outCn = weights.size[0];
2972     Mat w_scale = getBlob(node_proto, 4);
2973     CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
2974     Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
2975
2976     Mat out_sc = getBlob(node_proto, 6);
2977     Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S);
2978
2979     Mat weights_2d = weights.reshape(1, outCn);
2980     Mat biasFused(1, outCn, CV_32S);
2981     Mat outputMultiplier(1, outCn, CV_32F);
2982     for (int i = 0; i < outCn; i++)
2983     {
2984         biasFused.at<int>(i) = bias.at<int>(i) - inp_zp.at<int8_t>(0)*(cv::sum(weights_2d.row(i))[0]);
2985         outputMultiplier.at<float>(i) = (inp_sc.at<float>(0) * wt_sc.at<float>(i)) / out_sc.at<float>(0);
2986     }
2987
2988     layerParams.type = "ConvolutionInt8";
2989     layerParams.set("num_output", outCn);
2990     layerParams.set("input_zeropoint", inp_zp.at<int8_t>(0));
2991     layerParams.blobs.push_back(weights);
2992     layerParams.blobs.push_back(biasFused);
2993     layerParams.blobs.push_back(outputMultiplier);
2994     addLayer(layerParams, node_proto);
2995 }
2996
2997 void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
2998 {
2999     int ninputs = node_proto.input_size();
3000     CV_Assert(ninputs == 8);
3001
3002     if (constBlobs.find(node_proto.input(3)) == constBlobs.end())
3003         CV_Error(Error::StsNotImplemented, "Variable weights is not supported");
3004
3005     int firstInpDims = outShapes[node_proto.input(0)].size();
3006
3007     Mat inp_sc = getBlob(node_proto, 1);
3008     Mat inp_zp = getBlob(node_proto, 2);
3009
3010     Mat weights = getBlob(node_proto, 3).t();
3011     int outCn = weights.size[0];
3012     int secondInpDims = weights.dims;
3013
3014     Mat w_scale = getBlob(node_proto, 4);
3015     CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
3016     Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
3017     Mat out_sc = getBlob(node_proto, 6);
3018
3019     Mat bias(1, outCn, CV_32S);
3020     Mat outputMultiplier(1, outCn, CV_32F);
3021     for (int i = 0; i < outCn; i++)
3022     {
3023         bias.at<int>(i) = -inp_zp.at<int8_t>(0)*(cv::sum(weights.row(i))[0]);
3024         outputMultiplier.at<float>(i) = (inp_sc.at<float>(0) * wt_sc.at<float>(i)) / out_sc.at<float>(0);
3025     }
3026
3027     layerParams.type = "InnerProductInt8";
3028     layerParams.set("num_output", outCn);
3029     layerParams.set("axis", firstInpDims - secondInpDims + 1);
3030     layerParams.blobs.push_back(weights);
3031     layerParams.blobs.push_back(bias);
3032     layerParams.blobs.push_back(outputMultiplier);
3033     addLayer(layerParams, node_proto);
3034 }
3035
3036 void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
3037 {
3038     opencv_onnx::NodeProto node_proto = node_proto_;
3039     CV_Assert(node_proto.input_size() == 8);
3040     std::string op = (node_proto.op_type() == "QLinearAdd") ? "sum" : "prod";
3041     int constId = -1;
3042     for (int i = 0; i < 4; i += 3)
3043     {
3044         if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3045             constId = i;
3046     }
3047
3048     Mat inp_0_sc = getBlob(node_proto, 1);
3049     Mat inp_0_zp = getBlob(node_proto, 2);
3050
3051     Mat inp_1_sc = getBlob(node_proto, 4);
3052     Mat inp_1_zp = getBlob(node_proto, 5);
3053
3054     // Set 2nd input as the const input
3055     if (constId == 0)
3056     {
3057         cv::swap(inp_0_sc, inp_1_sc);
3058         cv::swap(inp_0_zp, inp_1_zp);
3059     }
3060
3061     float out_sc = getBlob(node_proto, 6).at<float>(0);
3062     int8_t out_zp = getBlob(node_proto, 7).at<int8_t>(0);
3063
3064     std::vector<float> inp_scales = {inp_0_sc.at<float>(0), inp_1_sc.at<float>(0)};
3065     std::vector<int8_t> inp_zps = {inp_0_zp.at<int8_t>(0), inp_1_zp.at<int8_t>(0)};
3066
3067     std::vector<float> coeffs;
3068     float offset;
3069     if (op == "sum")
3070     {
3071         coeffs = {inp_scales[0]/out_sc, inp_scales[1]/out_sc};
3072         offset = out_zp - coeffs[0]*inp_zps[0] - coeffs[1]*inp_zps[1];
3073     }
3074     else
3075     {
3076         coeffs = {inp_scales[0]/out_sc, inp_scales[1]};
3077         offset = out_zp;
3078     }
3079
3080     if (constId != -1)
3081     {
3082         Mat blob = getBlob(node_proto, constId);
3083         if (blob.total() == 1)
3084         {
3085             float val = inp_scales[1] * (blob.at<int8_t>(0) - inp_zps[1]);
3086             float scale = inp_scales[0] / out_sc;
3087             if (op == "prod")
3088                 scale *= val;
3089
3090             float shift = out_zp - scale*inp_zps[0];
3091             if (op == "sum")
3092                 shift += (val/out_sc);
3093
3094             LayerParams rescaleParams;
3095             rescaleParams.name = layerParams.name;
3096             rescaleParams.type = "Requantize";
3097             rescaleParams.set("depth", CV_8S);
3098             rescaleParams.set("scale", scale);
3099             rescaleParams.set("shift", shift);
3100             addLayer(rescaleParams, node_proto);
3101             return;
3102         }
3103         else
3104         {
3105             MatShape inpShape = outShapes[node_proto.input(3 - constId)];
3106             if (blob.dims == 2)
3107                 blob = blob.t();
3108
3109             if (shape(blob) == inpShape)
3110             {
3111                 LayerParams constParams;
3112                 constParams.name = layerParams.name + "/const";
3113                 constParams.type = "ConstInt8";
3114                 constParams.set("depth", CV_8S);
3115                 constParams.set("scales", DictValue::arrayReal(inp_1_sc.ptr<float>(), 1));
3116                 constParams.set("zeropoints", DictValue::arrayInt(inp_1_zp.ptr<int8_t>(), 1));
3117                 constParams.blobs.push_back(blob);
3118
3119                 int id = dstNet.addLayer(constParams.name, constParams.type, CV_8S, constParams);
3120                 layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
3121                 outShapes[constParams.name] = shape(blob);
3122                 node_proto.set_input(constId, constParams.name);
3123
3124                 layerParams.type = "EltwiseInt8";
3125                 layerParams.set("operation", op);
3126                 layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
3127                 layerParams.set("offset", offset);
3128             }
3129             else
3130             {
3131                 layerParams.type = "ScaleInt8";
3132                 layerParams.set("bias_term", op == "sum");
3133                 int axis = 1;
3134                 for (int i = 0; i < graph_proto.initializer_size(); i++)
3135                 {
3136                     opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
3137                     if (tensor_proto.name() == node_proto.input(constId))
3138                     {
3139                         axis = inpShape.size() - tensor_proto.dims_size();
3140                         break;
3141                     }
3142                 }
3143                 layerParams.set("axis", axis);
3144                 blob = blob.reshape(1, 1);
3145                 Mat blob_dequantized;
3146                 blob.convertTo(blob_dequantized, CV_32F, inp_scales[1], -(inp_scales[1] * inp_zps[1]));
3147                 layerParams.blobs.push_back(blob_dequantized);
3148                 layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size()));
3149             }
3150         }
3151     }
3152     else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(3)])
3153     {
3154         layerParams.type = "EltwiseInt8";
3155         layerParams.set("operation", op);
3156         layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
3157         layerParams.set("offset", offset);
3158     }
3159     else
3160     {
3161         layerParams.type = "ScaleInt8";
3162         layerParams.set("bias_term", op == "sum");
3163         layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size()));
3164     }
3165
3166     layerParams.set("input_zeropoints", DictValue::arrayInt(inp_zps.data(), inp_zps.size()));
3167     addLayer(layerParams, node_proto);
3168 }
3169
3170 void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3171 {
3172     CV_Assert(node_proto.input_size() == 5);
3173
3174     float slope = layerParams.get<float>("alpha");
3175     float inp_sc = getBlob(node_proto, 1).at<float>(0);
3176     int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
3177     float out_sc = getBlob(node_proto, 3).at<float>(0);
3178     int8_t out_zp = getBlob(node_proto, 4).at<int8_t>(0);
3179
3180     Mat lookUpTable(1, 256, CV_8S);
3181     int8_t* table = lookUpTable.ptr<int8_t>();
3182     for (int i = -128; i < 128; i++)
3183     {
3184         float x = inp_sc*(i - inp_zp);
3185         float y = x >= 0.f ? x : slope*x;
3186         int quantized = out_zp + cvRound(y/out_sc);
3187         table[i+128] = saturate_cast<int8_t>(quantized);
3188     }
3189
3190     layerParams.type = "ReLUInt8";
3191     layerParams.blobs.push_back(lookUpTable);
3192     addLayer(layerParams, node_proto);
3193 }
3194
3195 void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3196 {
3197     CV_Assert(node_proto.input_size() == 5);
3198
3199     float inp_sc = getBlob(node_proto, 1).at<float>(0);
3200     int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
3201     float out_sc = getBlob(node_proto, 3).at<float>(0);
3202     int8_t out_zp = getBlob(node_proto, 4).at<int8_t>(0);
3203
3204     Mat lookUpTable(1, 256, CV_8S);
3205     int8_t* table = lookUpTable.ptr<int8_t>();
3206     for (int i = -128; i < 128; i++)
3207     {
3208         float x = inp_sc*(i - inp_zp);
3209         float y = 1.f/(1.f + std::exp(-x));
3210         int quantized = out_zp + cvRound(y/out_sc);
3211         table[i+128] = saturate_cast<int8_t>(quantized);
3212     }
3213
3214     layerParams.type = "SigmoidInt8";
3215     layerParams.blobs.push_back(lookUpTable);
3216     addLayer(layerParams, node_proto);
3217 }
3218
3219 void ONNXImporter::parseQAvgPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
3220 {
3221     CV_Assert(node_proto.input_size() == 5);
3222     float inp_sc = getBlob(node_proto, 1).at<float>(0);
3223     int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
3224     float out_sc = getBlob(node_proto, 3).at<float>(0);
3225
3226     layerParams.type = "PoolingInt8";
3227     layerParams.set("pool", "ave");
3228     layerParams.set("global_pooling", node_proto.op_type() == "QLinearGlobalAveragePool");
3229     layerParams.set("multiplier", inp_sc/out_sc);
3230     layerParams.set("input_zeropoint", inp_zp);
3231     addLayer(layerParams, node_proto);
3232 }
3233
3234 void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
3235 {
3236     opencv_onnx::NodeProto node_proto = node_proto_;
3237     layerParams.type = "ConcatInt8";
3238     int num_inputs = node_proto.input_size();
3239
3240     float out_scale = getBlob(node_proto, 0).at<float>(0);
3241     int out_zp = getBlob(node_proto, 1).at<int8_t>(0);
3242
3243     for (int i = 2; i < num_inputs; i += 3)
3244     {
3245         float inp_scale = getBlob(node_proto, i + 1).at<float>(0);
3246         int inp_zp = getBlob(node_proto, i + 2).at<int8_t>(0);
3247
3248         if (inp_scale != out_scale || inp_zp != out_zp)
3249         {
3250             float scale = inp_scale/out_scale;
3251             float shift = out_zp - scale*inp_zp;
3252
3253             if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3254             {
3255                 Mat blob = getBlob(node_proto, i);
3256                 Mat blob_rescaled;
3257                 blob.convertTo(blob_rescaled, CV_8S, scale, shift);
3258                 constBlobs[node_proto.input(i)] = blob_rescaled;
3259             }
3260             else
3261             {
3262                 LayerParams rescaleParams;
3263                 rescaleParams.name = node_proto.input(i) + "/rescale";
3264                 rescaleParams.type = "Requantize";
3265                 rescaleParams.set("depth", CV_8S);
3266                 rescaleParams.set("scale", scale);
3267                 rescaleParams.set("shift", shift);
3268
3269                 opencv_onnx::NodeProto proto;
3270                 proto.add_input(node_proto.input(i));
3271                 proto.add_output(rescaleParams.name);
3272                 addLayer(rescaleParams, proto);
3273                 node_proto.set_input(i, rescaleParams.name);
3274             }
3275         }
3276     }
3277
3278     bool hasVariableInps = false;
3279     for (int i = 2; i < num_inputs; i += 3)
3280     {
3281         if (layer_id.find(node_proto.input(i)) != layer_id.end())
3282         {
3283             hasVariableInps = true;
3284             break;
3285         }
3286     }
3287
3288     if (!hasVariableInps)
3289     {
3290         std::vector<Mat> inputs, concatenated;
3291         MatShape inputShape;
3292         for (size_t i = 2; i < num_inputs; i += 3)
3293         {
3294             Mat blob = getBlob(node_proto, i);
3295             if (blob.size.dims() > inputShape.size())
3296             {
3297                 inputShape = shape(blob);
3298             }
3299             inputs.push_back(blob);
3300         }
3301
3302         int axis = layerParams.get<int>("axis", 1);
3303         for (size_t i = 0; i < inputs.size(); ++i)
3304         {
3305             MatShape targetShape = inputShape;
3306             targetShape[axis] = shape(inputs[i])[axis];
3307             CV_CheckEQ(total(targetShape), total(shape(inputs[i])), "");
3308             inputs[i] = inputs[i].reshape(0, targetShape);
3309         }
3310         runLayer(layerParams, inputs, concatenated);
3311         CV_Assert(concatenated.size() == 1);
3312         addConstant(layerParams.name, concatenated[0]);
3313         return;
3314     }
3315     else
3316     {
3317         for (int i = 2; i < num_inputs; i += 3)
3318         {
3319             if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
3320             {
3321                 LayerParams constParams;
3322                 constParams.name = node_proto.input(i);
3323                 constParams.type = "ConstInt8";
3324                 constParams.blobs.push_back(getBlob(node_proto, i));
3325                 constParams.set("depth", CV_8S);
3326
3327                 opencv_onnx::NodeProto proto;
3328                 proto.add_output(constParams.name);
3329                 addLayer(constParams, proto);
3330             }
3331         }
3332     }
3333     addLayer(layerParams, node_proto);
3334 }
3335
3336 // Domain: ai.onnx (default)
3337 // URL: https://github.com/onnx/onnx/blob/master/docs/Operators.md
3338 void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
3339 {
3340     CV_UNUSED(opset_version);
3341     DispatchMap dispatch;
3342
3343     dispatch["ArgMax"] = dispatch["ArgMin"] = &ONNXImporter::parseArg;
3344     dispatch["MaxUnpool"] = &ONNXImporter::parseMaxUnpool;
3345     dispatch["MaxPool"] = &ONNXImporter::parseMaxPool;
3346     dispatch["AveragePool"] = &ONNXImporter::parseAveragePool;
3347     dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] =
3348             dispatch["ReduceMax"] = &ONNXImporter::parseReduce;
3349     dispatch["Slice"] = &ONNXImporter::parseSlice;
3350     dispatch["Split"] = &ONNXImporter::parseSplit;
3351     dispatch["Add"] = dispatch["Sum"] = dispatch["Sub"] = &ONNXImporter::parseBias;
3352     dispatch["Pow"] = &ONNXImporter::parsePow;
3353     dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseMinMax;
3354     dispatch["Neg"] = &ONNXImporter::parseNeg;
3355     dispatch["Constant"] = &ONNXImporter::parseConstant;
3356     dispatch["LSTM"] = &ONNXImporter::parseLSTM;
3357     dispatch["GRU"] = &ONNXImporter::parseGRU;
3358     dispatch["ImageScaler"] = &ONNXImporter::parseImageScaler;
3359     dispatch["Clip"] = &ONNXImporter::parseClip;
3360     dispatch["LeakyRelu"] = &ONNXImporter::parseLeakyRelu;
3361     dispatch["Relu"] = &ONNXImporter::parseRelu;
3362     dispatch["Elu"] = &ONNXImporter::parseElu;
3363     dispatch["Tanh"] = &ONNXImporter::parseTanh;
3364     dispatch["Abs"] = &ONNXImporter::parseAbs;
3365     dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = &ONNXImporter::parseCompare;
3366     dispatch["PRelu"] = &ONNXImporter::parsePRelu;
3367     dispatch["LRN"] = &ONNXImporter::parseLRN;
3368     dispatch["InstanceNormalization"] = &ONNXImporter::parseInstanceNormalization;
3369     dispatch["BatchNormalization"] = &ONNXImporter::parseBatchNormalization;
3370     dispatch["Gemm"] = &ONNXImporter::parseGemm;
3371     dispatch["MatMul"] = &ONNXImporter::parseMatMul;
3372     dispatch["Mul"] = dispatch["Div"] = &ONNXImporter::parseMul;
3373     dispatch["Conv"] = &ONNXImporter::parseConv;
3374     dispatch["ConvTranspose"] = &ONNXImporter::parseConvTranspose;
3375     dispatch["Transpose"] = &ONNXImporter::parseTranspose;
3376     dispatch["Squeeze"] = &ONNXImporter::parseSqueeze;
3377     dispatch["Flatten"] = &ONNXImporter::parseFlatten;
3378     dispatch["Unsqueeze"] = &ONNXImporter::parseUnsqueeze;
3379     dispatch["Expand"] = &ONNXImporter::parseExpand;
3380     dispatch["Reshape"] = &ONNXImporter::parseReshape;
3381     dispatch["Pad"] = &ONNXImporter::parsePad;
3382     dispatch["Shape"] = &ONNXImporter::parseShape;
3383     dispatch["Cast"] = &ONNXImporter::parseCast;
3384     dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter::parseConstantFill;
3385     dispatch["Gather"] = &ONNXImporter::parseGather;
3386     dispatch["Concat"] = &ONNXImporter::parseConcat;
3387     dispatch["Resize"] = &ONNXImporter::parseResize;
3388     dispatch["Upsample"] = &ONNXImporter::parseUpsample;
3389     dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax;
3390     dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput;
3391     dispatch["CumSum"] = &ONNXImporter::parseCumSum;
3392
3393     std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
3394                                           "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
3395                                           "Identity", "Log", "Round", "Selu", "Sigmoid", "Sin", "Sinh", "Softmax",
3396                                           "Softplus", "Softsign", "Sqrt", "Tan", "ThresholdedRelu"};
3397     for (const auto& name : simpleLayers)
3398     {
3399         dispatch[name] = &ONNXImporter::parseSimpleLayers;
3400     }
3401
3402     // ai.onnx: opset 10+
3403     dispatch["QuantizeLinear"] = dispatch["DequantizeLinear"] = &ONNXImporter::parseQuantDequant;
3404     dispatch["QLinearConv"] = &ONNXImporter::parseQConv;
3405     dispatch["QLinearMatMul"] = &ONNXImporter::parseQMatMul;
3406
3407     domain_dispatch_map[str_domain_ai_onnx] = dispatch;
3408 }
3409
3410 // Domain: com.microsoft
3411 // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md
3412 void ONNXImporter::buildDispatchMap_COM_MICROSOFT(int opset_version)
3413 {
3414     CV_UNUSED(opset_version);
3415     DispatchMap dispatch;
3416
3417     dispatch["QLinearAdd"] = dispatch["QLinearMul"] = &ONNXImporter::parseQEltwise;
3418     dispatch["QLinearAveragePool"] = dispatch["QLinearGlobalAveragePool"] = &ONNXImporter::parseQAvgPool;
3419     dispatch["QLinearLeakyRelu"] = &ONNXImporter::parseQLeakyRelu;
3420     dispatch["QLinearSigmoid"] = &ONNXImporter::parseQSigmoid;
3421     dispatch["QLinearConcat"] = &ONNXImporter::parseQConcat;
3422
3423     domain_dispatch_map["com.microsoft"] = dispatch;
3424 }
3425
3426
3427 Net readNetFromONNX(const String& onnxFile)
3428 {
3429     return detail::readNetDiagnostic<ONNXImporter>(onnxFile.c_str());
3430 }
3431
3432 Net readNetFromONNX(const char* buffer, size_t sizeBuffer)
3433 {
3434     return detail::readNetDiagnostic<ONNXImporter>(buffer, sizeBuffer);
3435 }
3436
3437 Net readNetFromONNX(const std::vector<uchar>& buffer)
3438 {
3439     return readNetFromONNX(reinterpret_cast<const char*>(buffer.data()), buffer.size());
3440 }
3441
3442 Mat readTensorFromONNX(const String& path)
3443 {
3444     std::fstream input(path.c_str(), std::ios::in | std::ios::binary);
3445     if (!input)
3446     {
3447         CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", path.c_str()));
3448     }
3449
3450     opencv_onnx::TensorProto tensor_proto = opencv_onnx::TensorProto();
3451     if (!tensor_proto.ParseFromIstream(&input))
3452     {
3453         CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX data: %s", path.c_str()));
3454     }
3455     Mat mat = getMatFromTensor(tensor_proto);
3456     releaseONNXTensor(tensor_proto);
3457     return mat;
3458 }
3459
3460 CV__DNN_INLINE_NS_END
3461 }} // namespace
3462
3463 #endif