1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 // Copyright (C) 2018, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
8 #include "../precomp.hpp"
9 #include <opencv2/dnn/shape_utils.hpp>
11 #include <opencv2/core/utils/logger.defines.hpp>
12 #undef CV_LOG_STRIP_LEVEL
13 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
14 #include <opencv2/core/utils/logger.hpp>
25 #if defined(__GNUC__) && __GNUC__ >= 5
26 #pragma GCC diagnostic push
27 #pragma GCC diagnostic ignored "-Wsuggest-override"
29 #include "opencv-onnx.pb.h"
30 #if defined(__GNUC__) && __GNUC__ >= 5
31 #pragma GCC diagnostic pop
34 #include "onnx_graph_simplifier.hpp"
38 CV__DNN_INLINE_NS_BEGIN
43 opencv_onnx::ModelProto model_proto;
47 LayerInfo(int _layerId = 0, int _outputId = 0) : layerId(_layerId), outputId(_outputId) {}
50 std::map<std::string, Mat> getGraphTensors(
51 const opencv_onnx::GraphProto& graph_proto);
52 Mat getBlob(const opencv_onnx::NodeProto& node_proto, int index);
53 Mat getBlob(const std::string& input_name);
55 LayerParams getLayerParams(const opencv_onnx::NodeProto& node_proto);
56 bool isCeilMode(const LayerParams& layerParams);
58 void addConstant(const std::string& name, const Mat& blob);
59 void addLayer(LayerParams& layerParams,
60 const opencv_onnx::NodeProto& node_proto);
64 ONNXImporter(Net& net, const char *onnxFile)
68 CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);
70 std::fstream input(onnxFile, std::ios::in | std::ios::binary);
73 CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile));
76 if (!model_proto.ParseFromIstream(&input))
78 CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile));
84 ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
87 CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
89 struct _Buf : public std::streambuf
91 _Buf(const char* buffer, size_t sizeBuffer)
93 char* p = const_cast<char*>(buffer);
94 setg(p, p, p + sizeBuffer);
98 _Buf buf(buffer, sizeBuffer);
99 std::istream input(&buf);
101 if (!model_proto.ParseFromIstream(&input))
102 CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array.");
112 opencv_onnx::GraphProto graph_proto;
113 std::string framework_name;
115 std::map<std::string, Mat> constBlobs;
117 std::map<std::string, MatShape> outShapes; // List of internal blobs shapes.
118 typedef std::map<std::string, MatShape>::iterator IterShape_t;
120 std::map<std::string, LayerInfo> layer_id;
121 typedef std::map<std::string, LayerInfo>::iterator IterLayerId_t;
123 void handleNode(const opencv_onnx::NodeProto& node_proto);
126 inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey)
128 if (layerParams.has(oldKey)) {
129 layerParams.set(newKey, layerParams.get(oldKey));
130 layerParams.erase(oldKey);
134 void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto)
136 if (!tensor_proto.raw_data().empty()) {
137 delete tensor_proto.release_raw_data();
141 void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
142 std::vector<Mat>& outputs)
144 Ptr<Layer> layer = LayerFactory::createLayerInstance(params.type, params);
145 CV_Assert((bool)layer);
147 std::vector<MatShape> inpShapes(inputs.size());
149 for (size_t i = 0; i < inputs.size(); ++i)
151 inpShapes[i] = shape(inputs[i]);
152 if (i > 0 && ddepth != inputs[i].depth())
153 CV_Error(Error::StsNotImplemented, "Mixed input data types.");
154 ddepth = inputs[i].depth();
157 std::vector<MatShape> outShapes, internalShapes;
158 layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes);
160 std::vector<Mat> internals(internalShapes.size());
161 outputs.resize(outShapes.size());
162 for (size_t i = 0; i < outShapes.size(); ++i)
163 outputs[i].create(outShapes[i], ddepth);
164 for (size_t i = 0; i < internalShapes.size(); ++i)
165 internals[i].create(internalShapes[i], ddepth);
167 layer->finalize(inputs, outputs);
168 layer->forward(inputs, outputs, internals);
171 std::map<std::string, Mat> ONNXImporter::getGraphTensors(
172 const opencv_onnx::GraphProto& graph_proto)
174 opencv_onnx::TensorProto tensor_proto;
175 std::map<std::string, Mat> layers_weights;
177 for (int i = 0; i < graph_proto.initializer_size(); i++)
179 tensor_proto = graph_proto.initializer(i);
180 Mat mat = getMatFromTensor(tensor_proto);
181 releaseONNXTensor(tensor_proto);
182 layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
184 return layers_weights;
187 static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
188 std::vector<int32_t> dst(src.size());
189 convertInt64ToInt32(src, dst, src.size());
190 return DictValue::arrayInt(&dst[0], src.size());
193 LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto)
196 for(int i = 0; i < node_proto.attribute_size(); i++)
198 opencv_onnx::AttributeProto attribute_proto = node_proto.attribute(i);
199 std::string attribute_name = attribute_proto.name();
201 if(attribute_name == "kernel_shape")
203 CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
204 lp.set("kernel_size", parse(attribute_proto.ints()));
206 else if(attribute_name == "strides")
208 CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
209 lp.set("stride", parse(attribute_proto.ints()));
211 else if(attribute_name == "pads")
213 if (node_proto.op_type() == "Pad")
216 // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
217 // We need to shuffle it to begin0, end0, begin1, end1, ...
218 CV_Assert(attribute_proto.ints_size() % 2 == 0);
219 const int dims = attribute_proto.ints_size() / 2;
220 std::vector<int32_t> paddings;
221 paddings.reserve(attribute_proto.ints_size());
222 for (int i = 0; i < dims; ++i)
224 paddings.push_back(attribute_proto.ints(i));
225 paddings.push_back(attribute_proto.ints(dims + i));
227 lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
231 // Convolution or pooling.
232 CV_Assert(attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
233 lp.set("pad", parse(attribute_proto.ints()));
236 else if(attribute_name == "auto_pad")
238 if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") {
239 lp.set("pad_mode", "SAME");
241 else if (attribute_proto.s() == "VALID") {
242 lp.set("pad_mode", "VALID");
245 else if(attribute_name == "dilations")
247 CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
248 lp.set("dilation", parse(attribute_proto.ints()));
250 else if (attribute_proto.has_i())
252 ::google::protobuf::int64 src = attribute_proto.i();
253 if (src < std::numeric_limits<int32_t>::min() || src > std::numeric_limits<int32_t>::max())
254 CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range");
256 lp.set(attribute_name, saturate_cast<int32_t>(src));
258 else if (attribute_proto.has_f())
260 lp.set(attribute_name, attribute_proto.f());
262 else if (attribute_proto.has_s())
264 lp.set(attribute_name, attribute_proto.s());
266 else if (attribute_proto.floats_size() > 0)
268 lp.set(attribute_name, DictValue::arrayReal(
269 attribute_proto.floats().data(), attribute_proto.floats_size()));
271 else if (attribute_proto.ints_size() > 0)
273 lp.set(attribute_name, parse(attribute_proto.ints()));
275 else if (attribute_proto.has_t())
277 opencv_onnx::TensorProto tensor = attribute_proto.t();
278 Mat blob = getMatFromTensor(tensor);
279 lp.blobs.push_back(blob);
281 else if (attribute_proto.has_g())
283 CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str()));
285 else if (attribute_proto.graphs_size() > 0)
287 CV_Error(Error::StsNotImplemented,
288 cv::format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported",
289 attribute_name.c_str(), attribute_proto.graphs_size())
292 else if (attribute_proto.strings_size() > 0)
294 std::string msg = cv::format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported",
295 attribute_name.c_str(), attribute_proto.strings_size());
296 CV_LOG_ERROR(NULL, msg);
297 for (int i = 0; i < attribute_proto.strings_size(); i++)
299 CV_LOG_ERROR(NULL, " Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'");
301 CV_Error(Error::StsNotImplemented, msg);
303 else if (attribute_proto.tensors_size() > 0)
305 CV_Error(Error::StsNotImplemented,
306 cv::format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported",
307 attribute_name.c_str(), attribute_proto.tensors_size())
312 CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str()));
318 Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, int index)
320 CV_Assert(index < node_proto.input_size());
321 const std::string& input_name = node_proto.input(index);
322 return getBlob(input_name);
325 Mat ONNXImporter::getBlob(const std::string& input_name)
327 std::map<std::string, Mat>::const_iterator constBlob = constBlobs.find(input_name);
328 if (constBlob == constBlobs.end())
330 CV_Error(Error::StsBadArg, std::string("Blob ") + input_name + " not found in const blobs");
332 return constBlob->second;
335 void ONNXImporter::addLayer(LayerParams& layerParams,
336 const opencv_onnx::NodeProto& node_proto)
338 int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams);
339 for (int i = 0; i < node_proto.output_size(); ++i)
341 layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i)));
344 std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
346 for (int j = 0; j < node_proto.input_size(); j++)
348 const std::string& input_name = node_proto.input(j);
349 IterLayerId_t layerId = layer_id.find(input_name);
350 if (layerId != layer_id.end()) {
351 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum);
353 // Collect input shapes.
354 IterShape_t shapeIt = outShapes.find(input_name);
355 CV_Assert(shapeIt != outShapes.end());
356 layerInpShapes.push_back(shapeIt->second);
359 // Compute shape of output blob for this layer.
360 Ptr<Layer> layer = dstNet.getLayer(id); // FIXIT: avoid instantiation of layers during the import stage
361 layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
362 for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i)
364 outShapes[node_proto.output(i)] = layerOutShapes[i];
368 void ONNXImporter::addConstant(const std::string& name, const Mat& blob)
370 constBlobs.insert(std::make_pair(name, blob));
371 outShapes.insert(std::make_pair(name, shape(blob)));
374 void ONNXImporter::populateNet()
376 CV_Assert(model_proto.has_graph());
377 graph_proto = model_proto.graph();
379 std::string framework_version;
380 if (model_proto.has_producer_name())
381 framework_name = model_proto.producer_name();
382 if (model_proto.has_producer_version())
383 framework_version = model_proto.producer_version();
385 CV_LOG_INFO(NULL, "DNN/ONNX: loading ONNX"
386 << (model_proto.has_ir_version() ? cv::format(" v%d", (int)model_proto.ir_version()) : cv::String())
387 << " model produced by '" << framework_name << "'"
388 << (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str()))
389 << ". Number of nodes = " << graph_proto.node_size()
390 << ", inputs = " << graph_proto.input_size()
391 << ", outputs = " << graph_proto.output_size()
394 simplifySubgraphs(graph_proto);
396 const int layersSize = graph_proto.node_size();
397 CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes");
399 constBlobs = getGraphTensors(graph_proto);
400 // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
401 for (int i = 0; i < graph_proto.input_size(); ++i)
403 const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i);
404 CV_Assert(valueInfoProto.has_name());
405 CV_Assert(valueInfoProto.has_type());
406 opencv_onnx::TypeProto typeProto = valueInfoProto.type();
407 CV_Assert(typeProto.has_tensor_type());
408 opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type();
409 CV_Assert(tensor.has_shape());
410 opencv_onnx::TensorShapeProto tensorShape = tensor.shape();
412 MatShape inpShape(tensorShape.dim_size());
413 for (int j = 0; j < inpShape.size(); ++j)
415 inpShape[j] = tensorShape.dim(j).dim_value();
417 if (!inpShape.empty())
419 inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
421 outShapes[valueInfoProto.name()] = inpShape;
424 // create map with network inputs (without const blobs)
425 // fill map: push layer name, layer id and output id
426 std::vector<String> netInputs;
427 for (int j = 0; j < graph_proto.input_size(); j++)
429 const std::string& name = graph_proto.input(j).name();
430 if (constBlobs.find(name) == constBlobs.end()) {
431 netInputs.push_back(name);
432 layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1)));
435 dstNet.setInputsNames(netInputs);
437 for(int li = 0; li < layersSize; li++)
439 const opencv_onnx::NodeProto& node_proto = graph_proto.node(li);
440 handleNode(node_proto);
443 CV_LOG_DEBUG(NULL, "DNN/ONNX: import completed!");
446 void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
448 opencv_onnx::NodeProto node_proto = node_proto_; // TODO FIXIT
450 CV_Assert(node_proto.output_size() >= 1);
451 std::string name = node_proto.output(0);
452 std::string layer_type = node_proto.op_type();
453 CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
454 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
459 // FIXIT not all cases can be repacked into "LayerParams". Importer should handle such cases directly for each "layer_type"
460 LayerParams layerParams = getLayerParams(node_proto);
462 layerParams.name = name;
463 layerParams.type = layer_type;
465 if (layer_type == "MaxPool")
467 layerParams.type = "Pooling";
468 layerParams.set("pool", "MAX");
469 layerParams.set("ceil_mode", layerParams.has("pad_mode"));
471 else if (layer_type == "AveragePool")
473 layerParams.type = "Pooling";
474 layerParams.set("pool", "AVE");
475 layerParams.set("ceil_mode", layerParams.has("pad_mode"));
476 layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
478 else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" ||
479 layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")
481 CV_Assert(node_proto.input_size() == 1);
482 layerParams.type = "Pooling";
484 if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax")
486 else if (layer_type == "ReduceSum")
490 layerParams.set("pool", pool);
491 layerParams.set("global_pooling", !layerParams.has("axes"));
492 if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
494 MatShape inpShape = outShapes[node_proto.input(0)];
495 DictValue axes = layerParams.get("axes");
496 bool keepdims = layerParams.get<int>("keepdims");
497 MatShape targetShape = inpShape;
498 for (int i = 0; i < axes.size(); i++) {
499 int axis = clamp(axes.get<int>(i), inpShape.size());
501 targetShape[axis] = 1;
503 targetShape.erase(targetShape.begin() + axis);
507 if (inpShape.size() == 3 && axes.size() <= 2)
509 int axis = clamp(axes.get<int>(0), inpShape.size());
510 CV_CheckNE(axis, 0, "");
512 LayerParams reshapeLp;
513 reshapeLp.name = layerParams.name + "/reshape";
514 reshapeLp.type = "Reshape";
515 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
516 reshapeLp.set("axis", 0);
517 reshapeLp.set("num_axes", 1);
518 int newShape[] = {1, -1};
519 reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2));
521 opencv_onnx::NodeProto proto;
522 proto.add_input(node_proto.input(0));
523 proto.add_output(reshapeLp.name);
524 addLayer(reshapeLp, proto);
527 avgLp.name = layerParams.name + "/avg";
528 avgLp.type = "Pooling";
529 CV_Assert(layer_id.find(avgLp.name) == layer_id.end());
530 avgLp.set("pool", pool);
531 if (axes.size() == 2)
533 CV_CheckEQ(clamp(axes.get<int>(0), inpShape.size()), 1, "Unsupported mode");
534 CV_CheckEQ(clamp(axes.get<int>(1), inpShape.size()), 2, "Unsupported mode");
535 avgLp.set("global_pooling", true);
539 avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true);
540 avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1);
543 node_proto.set_input(0, reshapeLp.name);
544 node_proto.set_output(0, avgLp.name);
545 addLayer(avgLp, node_proto);
549 if (inpShape.size() != 4 && inpShape.size() != 5)
550 CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation.");
552 CV_Assert(axes.size() <= inpShape.size() - 2);
553 std::vector<int> kernel_size(inpShape.size() - 2, 1);
554 for (int i = 0; i < axes.size(); i++) {
555 int axis = clamp(axes.get<int>(i), inpShape.size());
556 CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
557 kernel_size[axis - 2] = inpShape[axis];
559 LayerParams poolLp = layerParams;
560 poolLp.name = layerParams.name + "/avg";
561 CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
562 poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));
564 node_proto.set_output(0, poolLp.name);
565 addLayer(poolLp, node_proto);
568 layerParams.type = "Reshape";
569 layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));
571 node_proto.set_input(0, node_proto.output(0));
572 node_proto.set_output(0, layerParams.name);
574 else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
576 CV_CheckEQ(layerParams.get<int>("keepdims"), 0, "layer only supports keepdims = false");
577 LayerParams reshapeLp;
578 reshapeLp.name = layerParams.name + "/reshape";
579 reshapeLp.type = "Reshape";
580 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
581 int newShape[] = {1, 1, 1, -1};
582 reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4));
584 opencv_onnx::NodeProto proto;
585 proto.add_input(node_proto.input(0));
586 proto.add_output(reshapeLp.name);
587 addLayer(reshapeLp, proto);
589 LayerParams poolLp = layerParams;
590 poolLp.name = layerParams.name + "/pool";
591 CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
593 node_proto.set_input(0, reshapeLp.name);
594 node_proto.set_output(0, poolLp.name);
595 addLayer(poolLp, node_proto);
597 layerParams.type = "Reshape";
598 int targetShape[] = {1};
599 layerParams.set("dim", DictValue::arrayInt(&targetShape[0], 1));
601 node_proto.set_input(0, node_proto.output(0));
602 node_proto.set_output(0, layerParams.name);
605 else if (layer_type == "Slice")
608 std::vector<int> begin;
609 std::vector<int> end;
610 int inp_size = node_proto.input_size();
614 if (layerParams.has("steps"))
616 DictValue steps = layerParams.get("steps");
617 for (int i = 0; i < steps.size(); ++i)
619 if (steps.get<int>(i) != 1)
620 CV_Error(Error::StsNotImplemented,
621 "Slice layer only supports steps = 1");
624 if (layerParams.has("axes")) {
625 DictValue axes = layerParams.get("axes");
626 for (int i = 1; i < axes.size(); ++i) {
627 CV_Assert(axes.get<int>(i - 1) == axes.get<int>(i) - 1);
629 axis = axes.get<int>(0);
632 DictValue starts = layerParams.get("starts");
633 DictValue ends = layerParams.get("ends");
634 CV_Assert(starts.size() == ends.size());
637 begin.resize(axis, 0);
638 end.resize(axis, -1);
640 for (int i = 0; i < starts.size(); ++i)
642 begin.push_back(starts.get<int>(i));
643 int finish = ends.get<int>(i);
644 end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim
647 CV_Assert(inp_size >= 3);
648 for (int i = 1; i < inp_size; i++) {
649 CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end());
651 Mat start_blob = getBlob(node_proto, 1);
652 Mat end_blob = getBlob(node_proto, 2);
653 CV_Assert(start_blob.total() == end_blob.total());
656 Mat axes_blob = getBlob(node_proto, 3);
657 const int* axes = (int*)axes_blob.data;
658 for (int i = 1; i < axes_blob.total(); ++i) {
659 CV_Assert(axes[i - 1] == axes[i] - 1);
664 const int* starts = start_blob.ptr<int>();
665 const int* ends = end_blob.ptr<int>();
667 begin.resize(axis, 0);
668 end.resize(axis, -1);
670 std::copy(starts, starts + start_blob.total(), std::back_inserter(begin));
671 for (int i = 0; i < end_blob.total(); ++i)
673 int finish = ends[i];
674 end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim
678 CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end());
679 Mat step_blob = getBlob(node_proto, 4);
681 // Very strange application for Slice op with tensor reversing.
682 // We just workaround it for 2d constants.
683 if (constBlobs.find(node_proto.input(0)) != constBlobs.end() &&
685 start_blob.at<int>(0) == -1 && step_blob.at<int>(0) == -1 &&
686 end_blob.at<int>(0) == std::numeric_limits<int32_t>::min())
688 Mat inp = getBlob(node_proto, 0);
692 flip(inp, flipped, 0);
693 addConstant(layerParams.name, flipped);
697 CV_CheckEQ(countNonZero(step_blob != 1), 0, "Slice layer only supports steps = 1");
700 layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size()));
701 layerParams.set("end", DictValue::arrayInt(&end[0], end.size()));
702 layerParams.set("axis", axis);
704 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
706 Mat inp = getBlob(node_proto, 0);
707 std::vector<Mat> inputs, sliced;
708 inputs.push_back(inp);
709 runLayer(layerParams, inputs, sliced);
710 CV_Assert(sliced.size() == 1);
711 addConstant(layerParams.name, sliced[0]);
715 else if (layer_type == "Split")
717 if (layerParams.has("split"))
719 DictValue splits = layerParams.get("split");
720 const int numSplits = splits.size();
721 CV_Assert(numSplits > 1);
723 std::vector<int> slicePoints(numSplits - 1, splits.get<int>(0));
724 for (int i = 1; i < splits.size() - 1; ++i)
726 slicePoints[i] = slicePoints[i - 1] + splits.get<int>(i - 1);
728 layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
732 layerParams.set("num_split", node_proto.output_size());
734 layerParams.type = "Slice";
736 else if (layer_type == "Add" || layer_type == "Sum" || layer_type == "Sub")
738 bool isSub = layer_type == "Sub";
739 CV_CheckEQ(node_proto.input_size(), 2, "");
740 bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end();
741 bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end();
742 if (is_const_0 && is_const_1)
744 Mat blob_0 = getBlob(node_proto, 0);
745 Mat blob_1 = getBlob(node_proto, 1);
746 CV_Assert(blob_0.size == blob_1.size);
747 Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1);
748 addConstant(layerParams.name, output);
751 else if (is_const_0 || is_const_1)
753 int const_blob_id = is_const_0 ? 0 : 1;
754 Mat blob = getBlob(node_proto, const_blob_id);
755 int blob_total = blob.total();
756 if (blob_total == 1) {
757 layerParams.type = "Power";
758 layerParams.set("shift", (isSub ? -1 : 1) * blob.at<float>(0));
761 MatShape inpShape = outShapes[node_proto.input(1 - const_blob_id)];
762 if (shape(blob) == inpShape)
764 LayerParams constParams;
765 constParams.name = layerParams.name + "/const";
766 constParams.type = "Const";
767 constParams.blobs.push_back((isSub ? -1 : 1) * blob);
768 int id = dstNet.addLayer(constParams.name, constParams.type, constParams);
769 layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
770 outShapes[constParams.name] = shape(blob);
772 layerParams.type = "Eltwise";
773 node_proto.set_input(const_blob_id, constParams.name);
777 layerParams.type = "Scale";
778 layerParams.set("bias_term", true);
780 for (int i = 0; i < graph_proto.initializer_size(); i++)
782 opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
783 if (tensor_proto.name() == node_proto.input(const_blob_id))
785 axis = inpShape.size() - tensor_proto.dims_size();
789 layerParams.set("axis", axis);
790 blob = blob.reshape(1, 1);
791 layerParams.blobs.push_back((isSub ? -1 : 1) * blob);
795 else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
797 layerParams.type = "Eltwise";
800 static float subCoeffs[] = {1.f, -1.f};
801 layerParams.set("coeff", DictValue::arrayReal<float*>(subCoeffs, 2));
808 LayerParams powerParams;
809 powerParams.name = layerParams.name + "/neg";
810 powerParams.type = "Power";
811 powerParams.set("scale", -1);
814 int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
816 IterLayerId_t layerId = layer_id.find(node_proto.input(1));
817 CV_Assert(layerId != layer_id.end());
818 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
820 layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
821 outShapes[powerParams.name] = outShapes[node_proto.input(1)];
823 //Replace input to Power
824 node_proto.set_input(1, powerParams.name);
826 layerParams.type = "Scale";
827 layerParams.set("bias_term", true);
830 else if (layer_type == "Pow")
832 if (layer_id.find(node_proto.input(1)) != layer_id.end())
833 CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power");
835 Mat blob = getBlob(node_proto, 1);
836 if (blob.total() != 1)
837 CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power");
839 blob.convertTo(blob, CV_32F);
840 layerParams.type = "Power";
841 layerParams.set("power", blob.at<float>(0));
843 else if (layer_type == "Max")
845 layerParams.type = "Eltwise";
846 layerParams.set("operation", "max");
848 else if (layer_type == "Neg")
850 layerParams.type = "Power";
851 layerParams.set("scale", -1);
853 else if (layer_type == "Constant")
855 CV_Assert(node_proto.input_size() == 0);
856 CV_Assert(layerParams.blobs.size() == 1);
857 addConstant(layerParams.name, layerParams.blobs[0]);
860 else if (layer_type == "LSTM")
862 LayerParams lstmParams = layerParams;
863 lstmParams.name += "/lstm";
865 // https://pytorch.org/docs/stable/nn.html#lstm
866 CV_Assert(node_proto.input_size() == 7);
867 Mat Wx = getBlob(node_proto, 1);
868 Mat Wh = getBlob(node_proto, 2);
869 Mat b = getBlob(node_proto, 3);
870 CV_CheckEQ(countNonZero(getBlob(node_proto, 5)), 0, "Unsupported non zero initial_h");
871 CV_CheckEQ(countNonZero(getBlob(node_proto, 6)), 0, "Unsupported non zero initial_c");
872 b = b.reshape(1, b.size[0]);
874 const int numHidden = lstmParams.get<int>("hidden_size");
875 const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM.
876 const int numFeatures = Wx.size[2];
877 Mat bx = b.colRange(0, b.cols / 2);
878 Mat bh = b.colRange(b.cols / 2, b.cols);
882 for (int k = 0; k < numDirs; ++k)
884 float* WxData = Wx.ptr<float>(k);
885 float* WhData = Wh.ptr<float>(k);
886 float* biasData = b.ptr<float>(k);
887 for (int j = 0; j < numHidden; ++j)
889 for (int i = 0; i < numFeatures; ++i)
891 std::swap(WxData[(numHidden + j) * numFeatures + i],
892 WxData[(numHidden * 2 + j) * numFeatures + i]);
894 for (int i = 0; i < numHidden; ++i)
896 std::swap(WhData[(numHidden + j) * numHidden + i],
897 WhData[(numHidden * 2 + j) * numHidden + i]);
899 std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]);
902 Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
903 Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
905 lstmParams.blobs.resize(3);
906 lstmParams.blobs[0] = Wh;
907 lstmParams.blobs[1] = Wx;
908 lstmParams.blobs[2] = b;
909 lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional");
911 node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name
912 addLayer(lstmParams, node_proto);
914 MatShape lstmShape = outShapes[node_proto.output(0)];
916 // Add fake 1 as it is done in ONNX
917 lstmShape.insert(lstmShape.begin() + 1, 1);
919 layerParams.type = "Reshape";
920 layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size()));
921 node_proto.set_input(0, lstmParams.name); // redirect input to LSTM
922 node_proto.set_output(0, layerParams.name); // keep origin LSTM's name
924 else if (layer_type == "ImageScaler")
926 const float scale = layerParams.has("scale") ? layerParams.get<float>("scale") : 1.0f;
927 layerParams.erase("scale");
929 if (layerParams.has("bias"))
931 layerParams.type = "Scale";
932 layerParams.blobs.push_back(
933 Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale));
935 layerParams.set("bias_term", true);
936 Mat bias(1, layerParams.get("bias").size(), CV_32FC1);
937 for (int j = 0; j < bias.total(); j++) {
938 bias.at<float>(0, j) = layerParams.get("bias").getRealValue(j);
940 layerParams.blobs.push_back(bias);
941 layerParams.erase("bias");
944 layerParams.set("scale", scale);
945 layerParams.type = "Power";
948 else if (layer_type == "Clip")
950 layerParams.type = "ReLU6";
951 replaceLayerParam(layerParams, "min", "min_value");
952 replaceLayerParam(layerParams, "max", "max_value");
955 else if (layer_type == "LeakyRelu")
957 layerParams.type = "ReLU";
958 replaceLayerParam(layerParams, "alpha", "negative_slope");
960 else if (layer_type == "Relu")
962 layerParams.type = "ReLU";
964 else if (layer_type == "Elu")
966 layerParams.type = "ELU";
968 else if (layer_type == "Tanh")
970 layerParams.type = "TanH";
972 else if (layer_type == "PRelu")
974 layerParams.type = "PReLU";
975 layerParams.blobs.push_back(getBlob(node_proto, 1));
977 else if (layer_type == "LRN")
979 replaceLayerParam(layerParams, "size", "local_size");
981 else if (layer_type == "InstanceNormalization")
983 if (node_proto.input_size() != 3)
984 CV_Error(Error::StsNotImplemented,
985 "Expected input, scale, bias");
987 layerParams.blobs.resize(4);
988 layerParams.blobs[2] = getBlob(node_proto, 1); // weightData
989 layerParams.blobs[3] = getBlob(node_proto, 2); // biasData
990 layerParams.set("has_bias", true);
991 layerParams.set("has_weight", true);
993 // Get number of channels in input
994 int size = layerParams.blobs[2].total();
995 layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean
996 layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std
998 LayerParams mvnParams;
999 mvnParams.name = layerParams.name + "/MVN";
1000 mvnParams.type = "MVN";
1001 mvnParams.set("eps", layerParams.get<float>("epsilon"));
1002 layerParams.erase("epsilon");
1005 int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams);
1007 IterLayerId_t layerId = layer_id.find(node_proto.input(0));
1008 CV_Assert(layerId != layer_id.end());
1009 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
1011 layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0)));
1012 outShapes[mvnParams.name] = outShapes[node_proto.input(0)];
1014 //Replace Batch Norm's input to MVN
1015 node_proto.set_input(0, mvnParams.name);
1016 layerParams.type = "BatchNorm";
1018 else if (layer_type == "BatchNormalization")
1020 if (node_proto.input_size() != 5)
1021 CV_Error(Error::StsNotImplemented,
1022 "Expected input, scale, bias, mean and var");
1024 layerParams.type = "BatchNorm";
1025 replaceLayerParam(layerParams, "epsilon", "eps");
1026 replaceLayerParam(layerParams, "spatial", "use_global_stats");
1028 Mat meanData = getBlob(node_proto, 3);
1029 Mat stdData = getBlob(node_proto, 4);
1031 layerParams.blobs.push_back(meanData);
1032 layerParams.blobs.push_back(stdData);
1034 if (!node_proto.input(1).empty()) {
1035 layerParams.set("has_weight", true);
1036 layerParams.blobs.push_back(getBlob(node_proto, 1)); // weightData
1038 layerParams.set("has_weight", false);
1041 if (!node_proto.input(2).empty()) {
1042 layerParams.set("has_bias", true);
1043 layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData
1045 layerParams.set("has_bias", false);
1048 else if (layer_type == "Gemm")
1050 CV_Assert(node_proto.input_size() >= 2);
1051 layerParams.type = "InnerProduct";
1052 Mat weights = getBlob(node_proto, 1);
1053 int ind_num_out = 0;
1054 if (layerParams.has("transB") && !layerParams.get<int>("transB")) {
1055 transpose(weights, weights);
1058 layerParams.blobs.push_back(weights);
1060 if (node_proto.input_size() == 3) {
1061 Mat bias = getBlob(node_proto, 2);
1062 layerParams.blobs.push_back(bias);
1064 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1066 Mat inputBuf = getBlob(node_proto, 0);
1068 LayerParams constParams;
1069 constParams.name = node_proto.input(0);
1070 constParams.type = "Const";
1071 constParams.blobs.push_back(inputBuf);
1073 opencv_onnx::NodeProto proto;
1074 proto.add_output(constParams.name);
1075 addLayer(constParams, proto);
1078 layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]);
1079 layerParams.set("bias_term", node_proto.input_size() == 3);
1081 else if (layer_type == "MatMul")
1083 CV_Assert(node_proto.input_size() == 2);
1084 layerParams.type = "InnerProduct";
1085 layerParams.set("bias_term", false);
1086 CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end());
1087 int firstInpDims = outShapes[node_proto.input(0)].size();
1090 if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
1092 Mat blob = getBlob(node_proto, 1);
1093 secondInpDims = blob.dims;
1094 layerParams.blobs.push_back(blob.t());
1095 layerParams.set("num_output", layerParams.blobs[0].size[0]);
1097 secondInpDims = outShapes[node_proto.input(1)].size();
1099 layerParams.set("axis", firstInpDims - secondInpDims + 1);
1101 else if (layer_type == "Mul" || layer_type == "Div")
1103 CV_Assert(node_proto.input_size() == 2);
1105 bool isDiv = layer_type == "Div";
1107 bool haveVariables = false;
1108 for (int i = 0; i < 2; ++i)
1110 if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
1113 haveVariables = true;
1115 if (constId != -1 && haveVariables)
1117 Mat blob = getBlob(node_proto, constId);
1118 blob = blob.reshape(1, 1);
1119 if (blob.total() == 1) {
1120 float coeff = isDiv ? 1.0 / blob.at<float>(0) : blob.at<float>(0);
1121 layerParams.set("scale", coeff);
1122 layerParams.type = "Power";
1126 divide(1.0, blob, blob);
1127 layerParams.blobs.push_back(blob);
1128 layerParams.type = "Scale";
1131 else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
1133 layerParams.type = "Eltwise";
1134 layerParams.set("operation", isDiv ? "div" : "prod");
1138 // Scale layer allocate output with the first input shape
1139 if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)]))
1141 opencv_onnx::NodeProto proto;
1142 proto.add_input(node_proto.input(1));
1143 proto.add_input(node_proto.input(0));
1144 proto.add_output(layerParams.name);
1150 LayerParams powerParams;
1151 powerParams.name = layerParams.name + "/inv";
1152 powerParams.type = "Power";
1153 powerParams.set("power", -1);
1155 //Create Power layer
1156 int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
1158 IterLayerId_t layerId = layer_id.find(node_proto.input(1));
1159 CV_Assert(layerId != layer_id.end());
1160 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
1162 layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
1163 outShapes[powerParams.name] = outShapes[node_proto.input(1)];
1165 //Replace input to Power
1166 node_proto.set_input(1, powerParams.name);
1168 layerParams.type = "Scale";
1173 Mat inp0 = getBlob(node_proto, 0);
1174 Mat inp1 = getBlob(node_proto, 1);
1175 if (inp0.size != inp1.size && inp1.total() != 1)
1176 CV_Error(Error::StsNotImplemented, "Constant multiply with different shapes");
1178 Mat out = isDiv ? inp0 / inp1 : inp0.mul(inp1);
1179 out = out.reshape(1, inp0.dims, inp0.size);
1180 out.dims = inp0.dims; // to workaround dims == 1
1181 addConstant(layerParams.name, out);
1185 else if (layer_type == "Conv")
1187 CV_Assert(node_proto.input_size() >= 2);
1188 layerParams.type = "Convolution";
1189 for (int j = 1; j < node_proto.input_size(); j++) {
1190 if (constBlobs.find(node_proto.input(j)) != constBlobs.end())
1192 layerParams.blobs.push_back(getBlob(node_proto, j));
1195 int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0];
1196 layerParams.set("num_output", outCn);
1198 else if (layer_type == "ConvTranspose")
1200 CV_Assert(node_proto.input_size() >= 2);
1201 layerParams.type = "Deconvolution";
1202 for (int j = 1; j < node_proto.input_size(); j++) {
1203 layerParams.blobs.push_back(getBlob(node_proto, j));
1205 layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get<int>("group", 1));
1206 layerParams.set("bias_term", node_proto.input_size() == 3);
1208 if (!layerParams.has("kernel_size"))
1209 CV_Error(Error::StsNotImplemented,
1210 "Required attribute 'kernel_size' is not present.");
1212 if (layerParams.has("output_shape"))
1214 const DictValue& outShape = layerParams.get("output_shape");
1215 DictValue strides = layerParams.get("stride");
1216 DictValue kernel = layerParams.get("kernel_size");
1219 std::vector<int> adjust_pads;
1220 if (layerParams.has("pad_mode"))
1222 padMode = toUpperCase(layerParams.get<String>("pad_mode"));
1223 if (padMode != "SAME" && padMode != "VALID")
1224 CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
1226 for (int i = 0; i < strides.size(); i++)
1228 int sz = outShape.get<int>(2 + i);
1229 int stride = strides.get<int>(i);
1230 adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride :
1231 (sz - kernel.get<int>(i)) % stride);
1233 layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size()));
1236 else if (layerParams.has("output_padding"))
1238 replaceLayerParam(layerParams, "output_padding", "adj");
1241 else if (layer_type == "Transpose")
1243 layerParams.type = "Permute";
1244 replaceLayerParam(layerParams, "perm", "order");
1246 CV_Assert(node_proto.input_size() == 1);
1247 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1249 std::vector<Mat> inputs(1, getBlob(node_proto, 0)), transposed;
1250 runLayer(layerParams, inputs, transposed);
1251 CV_Assert(transposed.size() == 1);
1252 addConstant(layerParams.name, transposed[0]);
1256 else if (layer_type == "Squeeze")
1258 CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
1259 DictValue axes_dict = layerParams.get("axes");
1260 MatShape inpShape = outShapes[node_proto.input(0)];
1262 std::vector<bool> maskedAxes(inpShape.size(), false);
1263 for (int i = 0; i < axes_dict.size(); ++i)
1265 int axis = axes_dict.getIntValue(i);
1266 CV_CheckLE(axis, static_cast<int>(inpShape.size()), "Squeeze axis");
1267 maskedAxes[axis] = inpShape[axis] == 1;
1270 for (int i = 0; i < inpShape.size(); ++i)
1273 outShape.push_back(inpShape[i]);
1275 if (outShape.size() != inpShape.size())
1277 layerParams.type = "Reshape";
1278 layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
1281 layerParams.type = "Identity";
1283 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1285 Mat inp = getBlob(node_proto, 0);
1286 Mat out = inp.reshape(1, outShape);
1287 out.dims = outShape.size(); // to workaround dims == 1
1288 addConstant(layerParams.name, out);
1292 else if (layer_type == "Flatten")
1294 CV_CheckEQ(node_proto.input_size(), 1, "");
1295 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1297 Mat input = getBlob(node_proto, 0);
1298 int axis = clamp(layerParams.get<int>("axis", 1), input.dims);
1300 std::vector<int> out_size(&input.size[0], &input.size[0] + axis);
1301 out_size.push_back(input.total(axis));
1302 Mat output = input.reshape(1, out_size);
1303 addConstant(layerParams.name, output);
1307 else if (layer_type == "Unsqueeze")
1309 CV_Assert(node_proto.input_size() == 1);
1310 DictValue axes = layerParams.get("axes");
1311 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1314 Mat input = getBlob(node_proto, 0);
1316 std::vector<int> dims;
1317 for (int j = 0; j < input.dims; j++) {
1318 dims.push_back(input.size[j]);
1320 CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size());
1321 for (int j = 0; j < axes.size(); j++) {
1322 dims.insert(dims.begin() + axes.getIntValue(j), 1);
1325 Mat out = input.reshape(0, dims);
1326 addConstant(layerParams.name, out);
1331 if (axes.size() != 1)
1332 CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze");
1334 MatShape inpShape = outShapes[node_proto.input(0)];
1335 int axis = axes.getIntValue(0);
1336 CV_Assert(0 <= axis && axis <= inpShape.size());
1337 std::vector<int> outShape = inpShape;
1338 outShape.insert(outShape.begin() + axis, 1);
1339 layerParams.type = "Reshape";
1340 layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
1342 else if (layer_type == "Expand")
1344 CV_CheckEQ(node_proto.input_size(), 2, "");
1345 const std::string& input0 = node_proto.input(0);
1346 const std::string& input1 = node_proto.input(1);
1347 Mat newShapeMat = getBlob(input1);
1348 MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());
1351 bool haveVariables = constBlobs.find(input0) == constBlobs.end();
1354 IterShape_t shapeIt = outShapes.find(input0);
1355 CV_Assert(shapeIt != outShapes.end());
1356 inpShape = shapeIt->second;
1360 inpShape = shape(getBlob(input0));
1363 String srcName = input0;
1364 // Unsqueeze and repeat along new axis
1365 if (targetShape.size() == inpShape.size() + 1)
1367 for (int i = 0; i < targetShape.size(); i++)
1369 if (targetShape[i] == -1 && i < inpShape.size())
1370 targetShape[i] = inpShape[i];
1371 else if (i < inpShape.size() && targetShape[i] != inpShape[i])
1372 inpShape.insert(inpShape.begin() + i, 1);
1376 LayerParams reshapeLp;
1377 reshapeLp.name = layerParams.name + "/reshape";
1378 reshapeLp.type = "Reshape";
1379 CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
1380 reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
1382 opencv_onnx::NodeProto proto;
1383 proto.add_input(node_proto.input(0));
1384 proto.add_output(reshapeLp.name);
1385 addLayer(reshapeLp, proto);
1386 srcName = reshapeLp.name;
1389 CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");
1391 std::vector<int> broadcast_axes;
1392 for (int i = 0; i < targetShape.size(); i++)
1394 if (targetShape[i] != inpShape[i])
1396 if (inpShape[i] == 1)
1397 broadcast_axes.push_back(i);
1399 CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i));
1405 if (broadcast_axes.size() != 1)
1406 CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input");
1408 Mat input = getBlob(node_proto, 0);
1409 input = input.reshape(0, total(inpShape, 0, broadcast_axes[0]));
1410 Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]);
1411 output = output.reshape(0, targetShape);
1412 addConstant(layerParams.name, output);
1416 if (broadcast_axes.size() == 2 &&
1417 broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1)
1419 LayerParams constParams;
1420 constParams.name = layerParams.name + "/const";
1421 CV_Assert(layer_id.find(constParams.name) == layer_id.end());
1422 constParams.type = "Const";
1424 Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr<int>(), CV_32F);
1425 constParams.blobs.push_back(inp);
1427 opencv_onnx::NodeProto proto;
1428 proto.add_output(constParams.name);
1429 addLayer(constParams, proto);
1431 layerParams.type = "Scale";
1432 layerParams.set("bias_term", false);
1433 node_proto.set_input(0, constParams.name);
1434 node_proto.set_input(1, srcName);
1436 else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1)
1438 String base_name = layerParams.name + "/copy_";
1439 std::vector<std::string> input_names;
1440 for (int j = 0; j < targetShape[broadcast_axes[0]]; j++)
1442 std::ostringstream ss;
1445 copyLP.name = base_name + ss.str();
1446 copyLP.type = "Identity";
1447 CV_Assert(layer_id.find(copyLP.name) == layer_id.end());
1448 input_names.push_back(copyLP.name);
1450 node_proto.set_input(0, srcName);
1451 node_proto.set_output(0, copyLP.name);
1452 addLayer(copyLP, node_proto);
1454 node_proto.clear_input();
1455 for (int i = 0; i < input_names.size(); i++)
1457 node_proto.add_input(input_names[i]);
1459 layerParams.set("axis", broadcast_axes[0]);
1460 layerParams.type = "Concat";
1461 node_proto.set_output(0, layerParams.name);
1464 CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
1466 else if (layer_type == "Reshape")
1468 CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape"));
1470 if (node_proto.input_size() == 2) {
1471 Mat blob = getBlob(node_proto, 1);
1472 CV_Assert(blob.type() == CV_32SC1);
1474 layerParams.set("dim", DictValue::arrayInt<int*>(
1475 blob.ptr<int>(), blob.total() ));
1477 if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
1478 std::vector<Mat> inputs(1, getBlob(node_proto, 0)), outputs;
1479 runLayer(layerParams, inputs, outputs);
1480 addConstant(layerParams.name, outputs[0]);
1485 DictValue shape = layerParams.get("shape");
1486 std::vector<int> dim;
1487 for (int j = 0; j < shape.size(); j++) {
1488 dim.push_back(shape.getIntValue(j));
1491 if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
1492 Mat input = getBlob(node_proto, 0);
1493 Mat out = input.reshape(0, dim);
1494 addConstant(layerParams.name, out);
1497 replaceLayerParam(layerParams, "shape", "dim");
1500 else if (layer_type == "Pad")
1502 layerParams.type = "Padding";
1503 replaceLayerParam(layerParams, "mode", "type");
1504 if (node_proto.input_size() == 3 || node_proto.input_size() == 2)
1506 // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
1507 // We need to shuffle it to begin0, end0, begin1, end1, ...
1508 Mat paddings = getBlob(node_proto, 1).reshape(1, 2);
1509 paddings = paddings.t();
1510 layerParams.set("paddings", DictValue::arrayInt(paddings.ptr<int>(), paddings.total()));
1512 if (node_proto.input_size() == 3)
1514 Mat value = getBlob(node_proto, 2);
1515 layerParams.set("value", value.at<float>(0));
1519 else if (layer_type == "Shape")
1521 CV_Assert(node_proto.input_size() == 1);
1522 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
1523 CV_Assert(shapeIt != outShapes.end());
1524 const MatShape& inpShape = shapeIt->second;
1526 Mat shapeMat(inpShape.size(), 1, CV_32S);
1527 for (int j = 0; j < inpShape.size(); ++j)
1528 shapeMat.at<int>(j) = inpShape[j];
1531 addConstant(layerParams.name, shapeMat);
1534 else if (layer_type == "Cast")
1536 if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
1538 Mat blob = getBlob(node_proto, 0);
1540 switch (layerParams.get<int>("to"))
1542 case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break;
1543 case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break;
1544 case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break;
1545 case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break;
1546 case opencv_onnx::TensorProto_DataType_INT8:
1547 case opencv_onnx::TensorProto_DataType_INT16:
1548 case opencv_onnx::TensorProto_DataType_INT32:
1549 case opencv_onnx::TensorProto_DataType_INT64: type = CV_32S; break;
1550 default: type = blob.type();
1553 blob.convertTo(dst, type);
1554 dst.dims = blob.dims;
1555 addConstant(layerParams.name, dst);
1559 layerParams.type = "Identity";
1561 else if (layer_type == "ConstantOfShape" || layer_type == "ConstantFill")
1565 if (!layerParams.blobs.empty())
1567 CV_Assert(!layerParams.has("value"));
1568 depth = layerParams.blobs[0].depth();
1570 layerParams.blobs[0].convertTo(floats, CV_32F);
1571 fill_value = floats.at<float>(0, 0);
1574 fill_value = layerParams.get("value", 0);
1576 MatShape inpShape = getBlob(node_proto, 0);
1577 for (int i = 0; i < inpShape.size(); i++)
1578 CV_CheckGT(inpShape[i], 0, "");
1579 Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value));
1580 addConstant(layerParams.name, tensor);
1583 else if (layer_type == "Gather")
1585 CV_Assert(node_proto.input_size() == 2);
1586 Mat indexMat = getBlob(node_proto, 1);
1587 CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1);
1588 int index = indexMat.at<int>(0);
1589 int axis = layerParams.get<int>("axis", 0);
1591 if ((constBlobs.find(node_proto.input(0)) != constBlobs.end()))
1593 Mat input = getBlob(node_proto, 0);
1595 std::vector<cv::Range> ranges(input.dims, Range::all());
1596 ranges[axis] = Range(index, index + 1);
1598 out = input(ranges);
1599 MatShape outShape = shape(out);
1600 if (outShape.size() > 1)
1602 outShape.erase(outShape.begin() + axis);
1603 out.reshape(0, outShape);
1607 addConstant(layerParams.name, out);
1612 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
1613 CV_Assert(shapeIt != outShapes.end());
1614 MatShape inpShape = shapeIt->second;
1616 LayerParams sliceLp;
1617 sliceLp.type = "Slice";
1618 sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name;
1619 std::vector<int> begin(inpShape.size(), 0);
1620 std::vector<int> end(inpShape.size(), -1);
1621 begin[axis] = index;
1622 end[axis] = index + 1;
1624 cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin.data(), begin.size());
1625 cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size());
1626 sliceLp.set("begin", paramBegin);
1627 sliceLp.set("end", paramEnd);
1629 if (inpShape.size() > 1)
1631 opencv_onnx::NodeProto proto;
1632 proto.add_input(node_proto.input(0));
1633 proto.add_output(sliceLp.name);
1634 addLayer(sliceLp, proto);
1636 inpShape.erase(inpShape.begin() + axis);
1637 layerParams.type = "Reshape";
1638 layerParams.set("axis", 0);
1639 layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
1640 node_proto.set_input(0, sliceLp.name);
1644 layerParams = sliceLp;
1648 else if (layer_type == "Concat")
1650 bool hasVariableInps = false;
1651 for (int i = 0; i < node_proto.input_size(); ++i)
1653 if (layer_id.find(node_proto.input(i)) != layer_id.end())
1655 hasVariableInps = true;
1660 if (!hasVariableInps)
1662 std::vector<Mat> inputs(node_proto.input_size()), concatenated;
1663 for (size_t i = 0; i < inputs.size(); ++i)
1665 inputs[i] = getBlob(node_proto, i);
1667 runLayer(layerParams, inputs, concatenated);
1669 CV_Assert(concatenated.size() == 1);
1670 addConstant(layerParams.name, concatenated[0]);
1674 else if (layer_type == "Resize")
1676 for (int i = 1; i < node_proto.input_size(); i++)
1677 CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());
1679 String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
1680 CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
1682 layerParams.set("align_corners", interp_mode == "align_corners");
1683 Mat shapes = getBlob(node_proto, node_proto.input_size() - 1);
1684 CV_CheckEQ(shapes.size[0], 4, "");
1685 CV_CheckEQ(shapes.size[1], 1, "");
1686 CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
1687 if (shapes.depth() == CV_32F)
1688 shapes.convertTo(shapes, CV_32S);
1689 int height = shapes.at<int>(2);
1690 int width = shapes.at<int>(3);
1691 if (node_proto.input_size() == 3)
1693 IterShape_t shapeIt = outShapes.find(node_proto.input(0));
1694 CV_Assert(shapeIt != outShapes.end());
1695 MatShape scales = shapeIt->second;
1696 height *= scales[2];
1699 layerParams.set("width", width);
1700 layerParams.set("height", height);
1702 if (layerParams.get<String>("mode") == "linear") {
1703 layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
1704 "opencv_linear" : "bilinear");
1706 replaceLayerParam(layerParams, "mode", "interpolation");
1708 else if (layer_type == "Upsample")
1710 //fused from Resize Subgraph
1711 if (layerParams.has("coordinate_transformation_mode"))
1713 String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
1714 CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
1716 layerParams.set("align_corners", interp_mode == "align_corners");
1717 if (layerParams.get<String>("mode") == "linear")
1719 layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
1720 "opencv_linear" : "bilinear");
1723 if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
1724 layerParams.set("mode", "opencv_linear");
1726 layerParams.type = "Resize";
1727 if (layerParams.has("scales"))
1730 DictValue scales = layerParams.get("scales");
1731 CV_Assert(scales.size() == 4);
1732 layerParams.set("zoom_factor_y", scales.getIntValue(2));
1733 layerParams.set("zoom_factor_x", scales.getIntValue(3));
1735 else if (layerParams.has("height_scale") && layerParams.has("width_scale"))
1738 replaceLayerParam(layerParams, "height_scale", "zoom_factor_y");
1739 replaceLayerParam(layerParams, "width_scale", "zoom_factor_x");
1744 Mat scales = getBlob(node_proto, 1);
1745 CV_Assert(scales.total() == 4);
1746 layerParams.set("zoom_factor_y", scales.at<float>(2));
1747 layerParams.set("zoom_factor_x", scales.at<float>(3));
1749 replaceLayerParam(layerParams, "mode", "interpolation");
1751 else if (layer_type == "SoftMax" || layer_type == "LogSoftmax")
1753 layerParams.type = "Softmax";
1754 layerParams.set("log_softmax", layer_type == "LogSoftmax");
1756 else if (layer_type == "DetectionOutput")
1758 CV_CheckEQ(node_proto.input_size(), 3, "");
1759 if (constBlobs.find(node_proto.input(2)) != constBlobs.end())
1761 Mat priors = getBlob(node_proto, 2);
1763 LayerParams constParams;
1764 constParams.name = layerParams.name + "/priors";
1765 constParams.type = "Const";
1766 constParams.blobs.push_back(priors);
1768 opencv_onnx::NodeProto priorsProto;
1769 priorsProto.add_output(constParams.name);
1770 addLayer(constParams, priorsProto);
1772 node_proto.set_input(2, constParams.name);
1777 for (int j = 0; j < node_proto.input_size(); j++) {
1778 if (layer_id.find(node_proto.input(j)) == layer_id.end())
1779 layerParams.blobs.push_back(getBlob(node_proto, j));
1782 addLayer(layerParams, node_proto);
1784 catch (const cv::Exception& e)
1786 CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
1787 << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
1789 for (int i = 0; i < node_proto.input_size(); i++)
1791 CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'");
1793 for (int i = 0; i < node_proto.output_size(); i++)
1795 CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'");
1797 CV_Error(Error::StsError, cv::format("Node [%s]:(%s) parse error: %s", layer_type.c_str(), name.c_str(), e.what()));
1801 Net readNetFromONNX(const String& onnxFile)
1804 ONNXImporter onnxImporter(net, onnxFile.c_str());
1808 Net readNetFromONNX(const char* buffer, size_t sizeBuffer)
1811 ONNXImporter onnxImporter(net, buffer, sizeBuffer);
1815 Net readNetFromONNX(const std::vector<uchar>& buffer)
1817 return readNetFromONNX(reinterpret_cast<const char*>(buffer.data()), buffer.size());
1820 Mat readTensorFromONNX(const String& path)
1822 std::fstream input(path.c_str(), std::ios::in | std::ios::binary);
1825 CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", path.c_str()));
1828 opencv_onnx::TensorProto tensor_proto = opencv_onnx::TensorProto();
1829 if (!tensor_proto.ParseFromIstream(&input))
1831 CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX data: %s", path.c_str()));
1833 Mat mat = getMatFromTensor(tensor_proto);
1834 releaseONNXTensor(tensor_proto);
1838 CV__DNN_INLINE_NS_END