#pragma GCC diagnostic pop
#endif
+#include "onnx_graph_simplifier.hpp"
+
namespace cv {
namespace dnn {
CV__DNN_INLINE_NS_BEGIN
}
}
-template<typename T1, typename T2>
-void convertInt64ToInt32(const T1& src, T2& dst, int size)
-{
- for (int i = 0; i < size; i++) {
- if (src[i] < std::numeric_limits<int32_t>::min() || src[i] > std::numeric_limits<int32_t>::max()) {
- CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range");
- }
- dst[i] = saturate_cast<int32_t>(src[i]);
- }
-}
-
-Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
-{
- CV_Assert(!tensor_proto.raw_data().empty() || !tensor_proto.float_data().empty()
- || !tensor_proto.double_data().empty() || !tensor_proto.int64_data().empty());
-
- opencv_onnx::TensorProto_DataType datatype = tensor_proto.data_type();
- Mat blob;
- std::vector<int> sizes;
- for (int i = 0; i < tensor_proto.dims_size(); i++) {
- sizes.push_back(tensor_proto.dims(i));
- }
- if (sizes.empty())
- sizes.assign(1, 1);
- if (datatype == opencv_onnx::TensorProto_DataType_FLOAT) {
-
- if (!tensor_proto.float_data().empty()) {
- const ::google::protobuf::RepeatedField<float> field = tensor_proto.float_data();
- Mat(sizes, CV_32FC1, (void*)field.data()).copyTo(blob);
- }
- else {
- char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
- Mat(sizes, CV_32FC1, val).copyTo(blob);
- }
- }
- else if (datatype == opencv_onnx::TensorProto_DataType_DOUBLE)
- {
- const ::google::protobuf::RepeatedField<double> field = tensor_proto.double_data();
- CV_Assert(!field.empty());
- Mat(sizes, CV_64FC1, (void*)field.data()).convertTo(blob, CV_32FC1);
- }
- else if (datatype == opencv_onnx::TensorProto_DataType_INT64)
- {
- blob.create(sizes, CV_32SC1);
- int32_t* dst = reinterpret_cast<int32_t*>(blob.data);
-
- if (!tensor_proto.int64_data().empty()) {
- ::google::protobuf::RepeatedField< ::google::protobuf::int64> src = tensor_proto.int64_data();
- convertInt64ToInt32(src, dst, blob.total());
- }
- else
- {
- char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
- int64_t* src = reinterpret_cast<int64_t*>(val);
- convertInt64ToInt32(src, dst, blob.total());
- }
- }
- else
- CV_Error(Error::StsUnsupportedFormat, "Unsupported data type: " +
- opencv_onnx::TensorProto_DataType_Name(datatype));
- if (tensor_proto.dims_size() == 0)
- blob.dims = 1; // To force 1-dimensional cv::Mat for scalars.
- return blob;
-}
-
void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
std::vector<Mat>& outputs)
{
{
CV_Assert(model_proto.has_graph());
opencv_onnx::GraphProto graph_proto = model_proto.graph();
+
+ simplifySubgraphs(graph_proto);
+
std::map<std::string, Mat> constBlobs = getGraphTensors(graph_proto);
// List of internal blobs shapes.
std::map<std::string, MatShape> outShapes;
}
else if (layer_type == "Split")
{
- DictValue splits = layerParams.get("split");
- const int numSplits = splits.size();
- CV_Assert(numSplits > 1);
+ if (layerParams.has("split"))
+ {
+ DictValue splits = layerParams.get("split");
+ const int numSplits = splits.size();
+ CV_Assert(numSplits > 1);
- std::vector<int> slicePoints(numSplits - 1, splits.get<int>(0));
- for (int i = 1; i < splits.size() - 1; ++i)
+ std::vector<int> slicePoints(numSplits - 1, splits.get<int>(0));
+ for (int i = 1; i < splits.size() - 1; ++i)
+ {
+ slicePoints[i] = slicePoints[i - 1] + splits.get<int>(i - 1);
+ }
+ layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
+ }
+ else
{
- slicePoints[i] = slicePoints[i - 1] + splits.get<int>(i - 1);
+ layerParams.set("num_split", node_proto.output_size());
}
- layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
layerParams.type = "Slice";
}
- else if (layer_type == "Add" || layer_type == "Sum")
+ else if (layer_type == "Add" || layer_type == "Sum" || layer_type == "Sub")
{
+ bool isSub = layer_type == "Sub";
+ CV_CheckEQ(node_proto.input_size(), 2, "");
if (layer_id.find(node_proto.input(1)) == layer_id.end())
{
Mat blob = getBlob(node_proto, constBlobs, 1);
blob = blob.reshape(1, 1);
if (blob.total() == 1) {
layerParams.type = "Power";
- layerParams.set("shift", blob.at<float>(0));
+ layerParams.set("shift", (isSub ? -1 : 1) * blob.at<float>(0));
}
else {
layerParams.type = "Scale";
layerParams.set("bias_term", true);
- layerParams.blobs.push_back(blob);
+ layerParams.blobs.push_back((isSub ? -1 : 1) * blob);
}
}
- else {
- layerParams.type = "Eltwise";
- }
- }
- else if (layer_type == "Max")
- {
- layerParams.type = "Eltwise";
- layerParams.set("operation", "max");
- }
- else if (layer_type == "Sub")
- {
- Mat blob = getBlob(node_proto, constBlobs, 1);
- if (blob.total() == 1) {
- layerParams.type = "Power";
- layerParams.set("shift", -blob.at<float>(0));
- }
- else {
- layerParams.type = "Scale";
- layerParams.set("has_bias", true);
- layerParams.blobs.push_back(-1.0f * blob.reshape(1, 1));
- }
- }
- else if (layer_type == "Div")
- {
- if (constBlobs.find(node_proto.input(1)) == constBlobs.end())
+ else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
{
layerParams.type = "Eltwise";
- layerParams.set("operation", "div");
+ if (isSub)
+ {
+ static float subCoeffs[] = {1.f, -1.f};
+ layerParams.set("coeff", DictValue::arrayReal<float*>(subCoeffs, 2));
+ }
}
else
{
- Mat blob = getBlob(node_proto, constBlobs, 1);
- CV_Assert_N(blob.type() == CV_32F, blob.total());
- if (blob.total() == 1)
- {
- layerParams.set("scale", 1.0f / blob.at<float>(0));
- layerParams.type = "Power";
- }
- else
+ if (isSub)
{
- layerParams.type = "Scale";
- divide(1.0, blob, blob);
- layerParams.blobs.push_back(blob);
- layerParams.set("bias_term", false);
+ LayerParams powerParams;
+ powerParams.name = layerParams.name + "/neg";
+ powerParams.type = "Power";
+ powerParams.set("scale", -1);
+
+ //Create Power layer
+ int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
+ //Connect to input
+ layerId = layer_id.find(node_proto.input(1));
+ CV_Assert(layerId != layer_id.end());
+ dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
+ //Add shape
+ layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
+ outShapes[powerParams.name] = outShapes[node_proto.input(1)];
+
+ //Replace input to Power
+ node_proto.set_input(1, powerParams.name);
}
+ layerParams.type = "Scale";
+ layerParams.set("bias_term", true);
}
}
+ else if (layer_type == "Max")
+ {
+ layerParams.type = "Eltwise";
+ layerParams.set("operation", "max");
+ }
else if (layer_type == "Neg")
{
layerParams.type = "Power";
layerParams.set("bias_term", false);
layerParams.set("num_output", layerParams.blobs[0].size[0]);
}
- else if (layer_type == "Mul")
+ else if (layer_type == "Mul" || layer_type == "Div")
{
CV_Assert(node_proto.input_size() == 2);
- if (layer_id.find(node_proto.input(1)) == layer_id.end()) {
- Mat blob = getBlob(node_proto, constBlobs, 1);
+
+ bool isDiv = layer_type == "Div";
+ int constId = -1;
+ bool haveVariables = false;
+ for (int i = 0; i < 2; ++i)
+ {
+ if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
+ constId = i;
+ else
+ haveVariables = true;
+ }
+ if (constId != -1 && haveVariables)
+ {
+ Mat blob = getBlob(node_proto, constBlobs, constId);
blob = blob.reshape(1, 1);
if (blob.total() == 1) {
- layerParams.set("scale", blob.at<float>(0));
+ float coeff = isDiv ? 1.0 / blob.at<float>(0) : blob.at<float>(0);
+ layerParams.set("scale", coeff);
layerParams.type = "Power";
}
else {
+ if (isDiv)
+ divide(1.0, blob, blob);
layerParams.blobs.push_back(blob);
layerParams.type = "Scale";
}
}
- else {
+ else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
+ {
layerParams.type = "Eltwise";
- layerParams.set("operation", "prod");
+ layerParams.set("operation", isDiv ? "div" : "prod");
+ }
+ else
+ {
+ if (isDiv)
+ {
+ LayerParams powerParams;
+ powerParams.name = layerParams.name + "/inv";
+ powerParams.type = "Power";
+ powerParams.set("power", -1);
+
+ //Create Power layer
+ int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
+ //Connect to input
+ layerId = layer_id.find(node_proto.input(1));
+ CV_Assert(layerId != layer_id.end());
+ dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
+ //Add shape
+ layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
+ outShapes[powerParams.name] = outShapes[node_proto.input(1)];
+
+ //Replace input to Power
+ node_proto.set_input(1, powerParams.name);
+ }
+ layerParams.type = "Scale";
+ }
+
+ if (!haveVariables)
+ {
+ Mat inp0 = getBlob(node_proto, constBlobs, 0);
+ Mat inp1 = getBlob(node_proto, constBlobs, 1);
+ if (inp0.size != inp1.size)
+ CV_Error(Error::StsNotImplemented, "Constant multiply with different shapes");
+
+ Mat out;
+ if (isDiv)
+ divide(inp0, inp1, out);
+ else
+ multiply(inp0, inp1, out);
+
+ out = out.reshape(1, inp0.dims, inp0.size);
+ out.dims = inp0.dims; // to workaround dims == 1
+ constBlobs.insert(std::make_pair(layerParams.name, out));
+ continue;
}
}
else if (layer_type == "Conv")
replaceLayerParam(layerParams, "width_scale", "zoom_factor_x");
}
replaceLayerParam(layerParams, "mode", "interpolation");
+
+ if (layerParams.get<String>("interpolation") == "linear" && framework_name == "pytorch") {
+ layerParams.type = "Resize";
+ Mat scales = getBlob(node_proto, constBlobs, 1);
+ CV_Assert(scales.total() == 4);
+ layerParams.set("interpolation", "opencv_linear");
+ layerParams.set("zoom_factor_y", scales.at<float>(2));
+ layerParams.set("zoom_factor_x", scales.at<float>(3));
+ }
}
else if (layer_type == "LogSoftmax")
{