alignCorners = params.get<bool>("align_corners", false);
halfPixelCenters = params.get<bool>("half_pixel_centers", false);
+ if (interpolation == "opencv_linear")
+ halfPixelCenters = true;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
Mat& inp = inputs[0];
Mat& out = outputs[0];
- if ((interpolation == "nearest" && !alignCorners && !halfPixelCenters) || interpolation == "opencv_linear" || (interpolation == "bilinear" && halfPixelCenters))
+ int depth = inp.depth();
+ if ((interpolation == "nearest" && !alignCorners && !halfPixelCenters) || (interpolation == "opencv_linear" && depth != CV_8S) ||
+ (interpolation == "bilinear" && halfPixelCenters && depth != CV_8S))
{
+ // INTER_LINEAR Resize mode does not support INT8 inputs
InterpolationFlags mode = interpolation == "nearest" ? INTER_NEAREST : INTER_LINEAR;
for (size_t n = 0; n < inputs[0].size[0]; ++n)
{
widthOffset = 0.5f * scaleWidth;
}
- for (int y = 0; y < outHeight; ++y)
+ if (depth == CV_8S)
{
- float input_y = y * scaleHeight + heightOffset;
- int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y);
- y0 = std::min(y0, inpHeight - 1);
+ for (int y = 0; y < outHeight; ++y)
+ {
+ float input_y = y * scaleHeight + heightOffset;
+ int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y);
+ y0 = std::min(y0, inpHeight - 1);
+
+ const int8_t* inpData_row = inpPlanes.ptr<int8_t>(y0);
+
+ for (int x = 0; x < outWidth; ++x)
+ {
+ float input_x = x * scaleWidth + widthOffset;
+ int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x);
+ x0 = std::min(x0, inpWidth - 1);
+
+ int8_t* outData = outPlanes.ptr<int8_t>(y, x);
+ const int8_t* inpData_row_c = inpData_row;
- const float* inpData_row = inpPlanes.ptr<float>(y0);
+ for (int c = 0; c < numPlanes; ++c)
+ {
+ *outData = inpData_row_c[x0];
- for (int x = 0; x < outWidth; ++x)
+ inpData_row_c += inpSpatialSize;
+ outData += outSpatialSize;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (int y = 0; y < outHeight; ++y)
{
- float input_x = x * scaleWidth + widthOffset;
- int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x);
- x0 = std::min(x0, inpWidth - 1);
+ float input_y = y * scaleHeight + heightOffset;
+ int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y);
+ y0 = std::min(y0, inpHeight - 1);
- float* outData = outPlanes.ptr<float>(y, x);
- const float* inpData_row_c = inpData_row;
+ const float* inpData_row = inpPlanes.ptr<float>(y0);
- for (int c = 0; c < numPlanes; ++c)
+ for (int x = 0; x < outWidth; ++x)
{
- *outData = inpData_row_c[x0];
+ float input_x = x * scaleWidth + widthOffset;
+ int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x);
+ x0 = std::min(x0, inpWidth - 1);
+
+ float* outData = outPlanes.ptr<float>(y, x);
+ const float* inpData_row_c = inpData_row;
- inpData_row_c += inpSpatialSize;
- outData += outSpatialSize;
+ for (int c = 0; c < numPlanes; ++c)
+ {
+ *outData = inpData_row_c[x0];
+
+ inpData_row_c += inpSpatialSize;
+ outData += outSpatialSize;
+ }
}
}
}
}
- else if (interpolation == "bilinear")
+ else if (interpolation == "bilinear" || interpolation == "opencv_linear")
{
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight);
Mat outPlanes = out.reshape(1, numPlanes * outHeight);
- for (int y = 0; y < outHeight; ++y)
+ if (depth == CV_8S)
+ {
+ for (int y = 0; y < outHeight; ++y)
+ {
+ float input_y = halfPixelCenters ? std::max((y + 0.5f) * scaleHeight - 0.5f, 0.0f) : y * scaleHeight;
+ int y0 = static_cast<int>(input_y);
+ const int8_t* inpData_row0 = inpPlanes.ptr<int8_t>(y0);
+ const int8_t* inpData_row1 = inpPlanes.ptr<int8_t>(std::min(y0 + 1, inpHeight - 1));
+ for (int x = 0; x < outWidth; ++x)
+ {
+ float input_x = halfPixelCenters ? std::max((x + 0.5f) * scaleWidth - 0.5f, 0.0f) : x * scaleWidth;
+ int x0 = static_cast<int>(input_x);
+ int x1 = std::min(x0 + 1, inpWidth - 1);
+
+ int8_t* outData = outPlanes.ptr<int8_t>(y, x);
+ const int8_t* inpData_row0_c = inpData_row0;
+ const int8_t* inpData_row1_c = inpData_row1;
+ for (int c = 0; c < numPlanes; ++c)
+ {
+ *outData = static_cast<int8_t>(inpData_row0_c[x0] +
+ (input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
+ (input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
+ (input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0])));
+
+ inpData_row0_c += inpSpatialSize;
+ inpData_row1_c += inpSpatialSize;
+ outData += outSpatialSize;
+ }
+ }
+ }
+ }
+ else
{
- float input_y = y * scaleHeight;
- int y0 = static_cast<int>(input_y);
- const float* inpData_row0 = inpPlanes.ptr<float>(y0);
- const float* inpData_row1 = inpPlanes.ptr<float>(std::min(y0 + 1, inpHeight - 1));
- for (int x = 0; x < outWidth; ++x)
+ for (int y = 0; y < outHeight; ++y)
{
- float input_x = x * scaleWidth;
- int x0 = static_cast<int>(input_x);
- int x1 = std::min(x0 + 1, inpWidth - 1);
-
- float* outData = outPlanes.ptr<float>(y, x);
- const float* inpData_row0_c = inpData_row0;
- const float* inpData_row1_c = inpData_row1;
- for (int c = 0; c < numPlanes; ++c)
+ float input_y = y * scaleHeight;
+ int y0 = static_cast<int>(input_y);
+ const float* inpData_row0 = inpPlanes.ptr<float>(y0);
+ const float* inpData_row1 = inpPlanes.ptr<float>(std::min(y0 + 1, inpHeight - 1));
+ for (int x = 0; x < outWidth; ++x)
{
- *outData = inpData_row0_c[x0] +
- (input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
- (input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
- (input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]));
-
- inpData_row0_c += inpSpatialSize;
- inpData_row1_c += inpSpatialSize;
- outData += outSpatialSize;
+ float input_x = x * scaleWidth;
+ int x0 = static_cast<int>(input_x);
+ int x1 = std::min(x0 + 1, inpWidth - 1);
+
+ float* outData = outPlanes.ptr<float>(y, x);
+ const float* inpData_row0_c = inpData_row0;
+ const float* inpData_row1_c = inpData_row1;
+ for (int c = 0; c < numPlanes; ++c)
+ {
+ *outData = inpData_row0_c[x0] +
+ (input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
+ (input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
+ (input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]));
+
+ inpData_row0_c += inpSpatialSize;
+ inpData_row1_c += inpSpatialSize;
+ outData += outSpatialSize;
+ }
}
}
}
}
#endif
+ virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
+ const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
+ {
+ return true;
+ }
protected:
int outWidth, outHeight;
void addConstant(const std::string& name, const Mat& blob);
void addLayer(LayerParams& layerParams,
const opencv_onnx::NodeProto& node_proto);
+ void handleQuantizedNode(LayerParams& layerParams,
+ const opencv_onnx::NodeProto& node_proto);
void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto,
const std::string& input, size_t n);
void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+ void parseQuantDequant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+ void parseQConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+ void parseQMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+ void parseQEltwise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+ void parseQLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+ void parseQSigmoid (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+ void parseQAvgPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+ void parseQConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
};
CV_Assert((bool)layer);
std::vector<MatShape> inpShapes(inputs.size());
- int ddepth = CV_32F;
+ int ddepth = params.get<int>("depth", CV_32F);
for (size_t i = 0; i < inputs.size(); ++i)
{
inpShapes[i] = shape(inputs[i]);
void ONNXImporter::addLayer(LayerParams& layerParams,
const opencv_onnx::NodeProto& node_proto)
{
- int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams);
+ int depth = layerParams.get<int>("depth", CV_32F);
+ int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams);
for (int i = 0; i < node_proto.output_size(); ++i)
{
layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i)));
outShapes.insert(std::make_pair(name, shape(blob)));
}
+void ONNXImporter::handleQuantizedNode(LayerParams& layerParams,
+ const opencv_onnx::NodeProto& node_proto)
+{
+ // Quantized nodes have output names ending with 'quantized'
+ std::string outName = node_proto.output(0);
+ int len = outName.length();
+ if (len <= 9)
+ return;
+
+ if (outName.substr(len - 9) == "quantized")
+ {
+ outName = outName.substr(0, len - 9);
+ Mat scale, zeropoint;
+
+ if (constBlobs.find(outName + "scale") != constBlobs.end() &&
+ constBlobs.find(outName + "zero_point") != constBlobs.end())
+ {
+ scale = getBlob(outName + "scale");
+ zeropoint = getBlob(outName + "zero_point");
+ }
+ else
+ {
+ std::string inpName = node_proto.input(0);
+ inpName = inpName.substr(0, inpName.length() - 9);
+ scale = getBlob(inpName + "scale");
+ zeropoint = getBlob(inpName + "zero_point");
+
+ for (int i = 0; i < node_proto.output_size(); i++)
+ {
+ std::string out = node_proto.output(i);
+ out = out.substr(0, out.length() - 9);
+ addConstant(out + "scale", scale);
+ addConstant(out + "zero_point", zeropoint);
+ }
+ }
+
+ if (scale.total() != 1 || zeropoint.total() != 1)
+ CV_Error(Error::StsNotImplemented, "Per-channel scales/zeropoints are not supported");
+
+ layerParams.set("depth", CV_8S);
+ layerParams.set("scales", DictValue::arrayReal(scale.ptr<float>(), 1));
+ layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr<int8_t>(), 1));
+ }
+}
+
void ONNXImporter::populateNet()
{
CV_Assert(model_proto.has_graph());
layerParams.type = layer_type;
layerParams.set("has_dynamic_shapes", hasDynamicShapes);
+ handleQuantizedNode(layerParams, node_proto);
+
DispatchMap::const_iterator iter = dispatch.find(layer_type);
if (iter != dispatch.end())
{
void ONNXImporter::parseMaxPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
- layerParams.type = "Pooling";
+ int depth = layerParams.get<int>("depth", CV_32F);
+ layerParams.type = (depth == CV_8S) ? "PoolingInt8" : "Pooling";
layerParams.set("pool", "MAX");
layerParams.set("ceil_mode", layerParams.has("pad_mode"));
addLayer(layerParams, node_proto);
{
layerParams.set("num_split", node_proto.output_size());
}
- layerParams.type = "Slice";
+ int depth = layerParams.get<int>("depth", CV_32F);
+ layerParams.type = (depth == CV_8S) ? "SliceInt8" : "Slice";
addLayer(layerParams, node_proto);
}
void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
- layerParams.type = "Permute";
+ int depth = layerParams.get<int>("depth", CV_32F);
+ layerParams.type = (depth == CV_8S) ? "PermuteInt8" : "Permute";
replaceLayerParam(layerParams, "perm", "order");
CV_Assert(node_proto.input_size() == 1);
addConstant(layerParams.name, out);
return;
}
+ int depth = layerParams.get<int>("depth", CV_32F);
+ layerParams.type += (depth == CV_8S) ? "Int8" : "";
addLayer(layerParams, node_proto);
}
if (axes.size() != 1)
CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze");
+ int depth = layerParams.get<int>("depth", CV_32F);
+
MatShape inpShape = outShapes[node_proto.input(0)];
int axis = axes.getIntValue(0);
CV_Assert(0 <= axis && axis <= inpShape.size());
std::vector<int> outShape = inpShape;
outShape.insert(outShape.begin() + axis, 1);
- layerParams.type = "Reshape";
+ layerParams.type = (depth == CV_8S) ? "ReshapeInt8" : "Reshape";
layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
if (hasDynamicShapes)
{
void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape"));
+ int depth = layerParams.get<int>("depth", CV_32F);
+ layerParams.type += (depth == CV_8S) ? "Int8" : "";
if (node_proto.input_size() == 2) {
Mat blob = getBlob(node_proto, 1);
void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
- layerParams.type = "Padding";
+ int depth = layerParams.get<int>("depth", CV_32F);
+ layerParams.type = (depth == CV_8S) ? "PaddingInt8" : "Padding";
replaceLayerParam(layerParams, "mode", "type");
if (node_proto.input_size() == 3 || node_proto.input_size() == 2)
{
if (node_proto.input_size() == 3)
{
Mat value = getBlob(node_proto, 2);
- layerParams.set("value", value.ptr<float>()[0]);
+ float padValue = (depth == CV_8S) ? (float)value.ptr<int8_t>()[0] : value.ptr<float>()[0];
+ layerParams.set("value", padValue);
}
}
addLayer(layerParams, node_proto);
for (int i = 1; i < node_proto.input_size(); i++)
CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());
+ int depth = layerParams.get<int>("depth", CV_32F);
+ layerParams.type += (depth == CV_8S) ? "Int8" : "";
+
if (layerParams.has("coordinate_transformation_mode"))
{
String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
addLayer(layerParams, node_proto);
}
+void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
+{
+ CV_Assert(node_proto.input_size() == 3);
+ layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize";
+
+ if (node_proto.op_type() == "DequantizeLinear")
+ {
+ Mat scale = getBlob(node_proto, 1);
+ Mat zeropoint = getBlob(node_proto, 2);
+
+ layerParams.set("scales", DictValue::arrayReal(scale.ptr<float>(), 1));
+ layerParams.set("zeropoints", DictValue::arrayInt(zeropoint.ptr<int8_t>(), 1));
+ }
+ addLayer(layerParams, node_proto);
+}
+
+void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
+{
+ int ninputs = node_proto.input_size();
+ CV_Assert(ninputs == 8 || ninputs == 9);
+
+ Mat inp_sc = getBlob(node_proto, 1);
+ Mat inp_zp = getBlob(node_proto, 2);
+
+ Mat weights = getBlob(node_proto, 3);
+ int outCn = weights.size[0];
+ Mat w_scale = getBlob(node_proto, 4);
+ CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
+ Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
+
+ Mat out_sc = getBlob(node_proto, 6);
+ Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S);
+
+ Mat weights_2d = weights.reshape(1, outCn);
+ Mat biasFused(1, outCn, CV_32S);
+ Mat outputMultiplier(1, outCn, CV_32F);
+ for (int i = 0; i < outCn; i++)
+ {
+ biasFused.at<int>(i) = bias.at<int>(i) - inp_zp.at<int8_t>(0)*(cv::sum(weights_2d.row(i))[0]);
+ outputMultiplier.at<float>(i) = (inp_sc.at<float>(0) * wt_sc.at<float>(i)) / out_sc.at<float>(0);
+ }
+
+ layerParams.type = "ConvolutionInt8";
+ layerParams.set("num_output", outCn);
+ layerParams.set("input_zeropoint", inp_zp.at<int8_t>(0));
+ layerParams.blobs.push_back(weights);
+ layerParams.blobs.push_back(biasFused);
+ layerParams.blobs.push_back(outputMultiplier);
+ addLayer(layerParams, node_proto);
+}
+
+void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
+{
+ int ninputs = node_proto.input_size();
+ CV_Assert(ninputs == 8);
+
+ if (constBlobs.find(node_proto.input(3)) == constBlobs.end())
+ CV_Error(Error::StsNotImplemented, "Variable weights is not supported");
+
+ int firstInpDims = outShapes[node_proto.input(0)].size();
+
+ Mat inp_sc = getBlob(node_proto, 1);
+ Mat inp_zp = getBlob(node_proto, 2);
+
+ Mat weights = getBlob(node_proto, 3).t();
+ int outCn = weights.size[0];
+ int secondInpDims = weights.dims;
+
+ Mat w_scale = getBlob(node_proto, 4);
+ CV_Assert(w_scale.total() == 1 || w_scale.total() == outCn);
+ Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
+ Mat out_sc = getBlob(node_proto, 6);
+
+ Mat bias(1, outCn, CV_32S);
+ Mat outputMultiplier(1, outCn, CV_32F);
+ for (int i = 0; i < outCn; i++)
+ {
+ bias.at<int>(i) = -inp_zp.at<int8_t>(0)*(cv::sum(weights.row(i))[0]);
+ outputMultiplier.at<float>(i) = (inp_sc.at<float>(0) * wt_sc.at<float>(i)) / out_sc.at<float>(0);
+ }
+
+ layerParams.type = "InnerProductInt8";
+ layerParams.set("num_output", outCn);
+ layerParams.set("axis", firstInpDims - secondInpDims + 1);
+ layerParams.blobs.push_back(weights);
+ layerParams.blobs.push_back(bias);
+ layerParams.blobs.push_back(outputMultiplier);
+ addLayer(layerParams, node_proto);
+}
+
+void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
+{
+ opencv_onnx::NodeProto node_proto = node_proto_;
+ CV_Assert(node_proto.input_size() == 8);
+ std::string op = (node_proto.op_type() == "QLinearAdd") ? "sum" : "prod";
+ int constId = -1;
+ for (int i = 0; i < 4; i += 3)
+ {
+ if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
+ constId = i;
+ }
+
+ Mat inp_0_sc = getBlob(node_proto, 1);
+ Mat inp_0_zp = getBlob(node_proto, 2);
+
+ Mat inp_1_sc = getBlob(node_proto, 4);
+ Mat inp_1_zp = getBlob(node_proto, 5);
+
+ // Set 2nd input as the const input
+ if (constId == 0)
+ {
+ cv::swap(inp_0_sc, inp_1_sc);
+ cv::swap(inp_0_zp, inp_1_zp);
+ }
+
+ float out_sc = getBlob(node_proto, 6).at<float>(0);
+ int8_t out_zp = getBlob(node_proto, 7).at<int8_t>(0);
+
+ std::vector<float> inp_scales = {inp_0_sc.at<float>(0), inp_1_sc.at<float>(0)};
+ std::vector<int8_t> inp_zps = {inp_0_zp.at<int8_t>(0), inp_1_zp.at<int8_t>(0)};
+
+ std::vector<float> coeffs;
+ float offset;
+ if (op == "sum")
+ {
+ coeffs = {inp_scales[0]/out_sc, inp_scales[1]/out_sc};
+ offset = out_zp - coeffs[0]*inp_zps[0] - coeffs[1]*inp_zps[1];
+ }
+ else
+ {
+ coeffs = {inp_scales[0]/out_sc, inp_scales[1]};
+ offset = out_zp;
+ }
+
+ if (constId != -1)
+ {
+ Mat blob = getBlob(node_proto, constId);
+ if (blob.total() == 1)
+ {
+ float val = inp_scales[1] * (blob.at<int8_t>(0) - inp_zps[1]);
+ float scale = inp_scales[0] / out_sc;
+ if (op == "prod")
+ scale *= val;
+
+ float shift = out_zp - scale*inp_zps[0];
+ if (op == "sum")
+ shift += (val/out_sc);
+
+ LayerParams rescaleParams;
+ rescaleParams.name = layerParams.name;
+ rescaleParams.type = "Requantize";
+ rescaleParams.set("depth", CV_8S);
+ rescaleParams.set("scale", scale);
+ rescaleParams.set("shift", shift);
+ addLayer(rescaleParams, node_proto);
+ return;
+ }
+ else
+ {
+ MatShape inpShape = outShapes[node_proto.input(3 - constId)];
+ if (blob.dims == 2)
+ blob = blob.t();
+
+ if (shape(blob) == inpShape)
+ {
+ LayerParams constParams;
+ constParams.name = layerParams.name + "/const";
+ constParams.type = "ConstInt8";
+ constParams.set("depth", CV_8S);
+ constParams.set("scales", DictValue::arrayReal(inp_1_sc.ptr<float>(), 1));
+ constParams.set("zeropoints", DictValue::arrayInt(inp_1_zp.ptr<int8_t>(), 1));
+ constParams.blobs.push_back(blob);
+
+ int id = dstNet.addLayer(constParams.name, constParams.type, CV_8S, constParams);
+ layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
+ outShapes[constParams.name] = shape(blob);
+ node_proto.set_input(constId, constParams.name);
+
+ layerParams.type = "EltwiseInt8";
+ layerParams.set("operation", op);
+ layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
+ layerParams.set("offset", offset);
+ }
+ else
+ {
+ layerParams.type = "ScaleInt8";
+ layerParams.set("bias_term", op == "sum");
+ int axis = 1;
+ for (int i = 0; i < graph_proto.initializer_size(); i++)
+ {
+ opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
+ if (tensor_proto.name() == node_proto.input(constId))
+ {
+ axis = inpShape.size() - tensor_proto.dims_size();
+ break;
+ }
+ }
+ layerParams.set("axis", axis);
+ blob = blob.reshape(1, 1);
+ Mat blob_dequantized;
+ blob.convertTo(blob_dequantized, CV_32F, inp_scales[1], -(inp_scales[1] * inp_zps[1]));
+ layerParams.blobs.push_back(blob_dequantized);
+ layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size()));
+ }
+ }
+ }
+ else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(3)])
+ {
+ layerParams.type = "EltwiseInt8";
+ layerParams.set("operation", op);
+ layerParams.set("coeff", DictValue::arrayReal(coeffs.data(), coeffs.size()));
+ layerParams.set("offset", offset);
+ }
+ else
+ {
+ layerParams.type = "ScaleInt8";
+ layerParams.set("bias_term", op == "sum");
+ layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size()));
+ }
+
+ layerParams.set("input_zeropoints", DictValue::arrayInt(inp_zps.data(), inp_zps.size()));
+ addLayer(layerParams, node_proto);
+}
+
+void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
+{
+ CV_Assert(node_proto.input_size() == 5);
+
+ float slope = layerParams.get<float>("alpha");
+ float inp_sc = getBlob(node_proto, 1).at<float>(0);
+ int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
+ float out_sc = getBlob(node_proto, 3).at<float>(0);
+ int8_t out_zp = getBlob(node_proto, 4).at<int8_t>(0);
+
+ Mat lookUpTable(1, 256, CV_8S);
+ int8_t* table = lookUpTable.ptr<int8_t>();
+ for (int i = -128; i < 128; i++)
+ {
+ float x = inp_sc*(i - inp_zp);
+ float y = x >= 0.f ? x : slope*x;
+ int quantized = out_zp + cvRound(y/out_sc);
+ table[i+128] = saturate_cast<int8_t>(quantized);
+ }
+
+ layerParams.type = "ReLUInt8";
+ layerParams.blobs.push_back(lookUpTable);
+ addLayer(layerParams, node_proto);
+}
+
+void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
+{
+ CV_Assert(node_proto.input_size() == 5);
+
+ float inp_sc = getBlob(node_proto, 1).at<float>(0);
+ int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
+ float out_sc = getBlob(node_proto, 3).at<float>(0);
+ int8_t out_zp = getBlob(node_proto, 4).at<int8_t>(0);
+
+ Mat lookUpTable(1, 256, CV_8S);
+ int8_t* table = lookUpTable.ptr<int8_t>();
+ for (int i = -128; i < 128; i++)
+ {
+ float x = inp_sc*(i - inp_zp);
+ float y = 1.f/(1.f + std::exp(-x));
+ int quantized = out_zp + cvRound(y/out_sc);
+ table[i+128] = saturate_cast<int8_t>(quantized);
+ }
+
+ layerParams.type = "SigmoidInt8";
+ layerParams.blobs.push_back(lookUpTable);
+ addLayer(layerParams, node_proto);
+}
+
+void ONNXImporter::parseQAvgPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
+{
+ CV_Assert(node_proto.input_size() == 5);
+ float inp_sc = getBlob(node_proto, 1).at<float>(0);
+ int8_t inp_zp = getBlob(node_proto, 2).at<int8_t>(0);
+ float out_sc = getBlob(node_proto, 3).at<float>(0);
+
+ layerParams.type = "PoolingInt8";
+ layerParams.set("pool", "ave");
+ layerParams.set("global_pooling", node_proto.op_type() == "QLinearGlobalAveragePool");
+ layerParams.set("multiplier", inp_sc/out_sc);
+ layerParams.set("input_zeropoint", inp_zp);
+ addLayer(layerParams, node_proto);
+}
+
+void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
+{
+ opencv_onnx::NodeProto node_proto = node_proto_;
+ layerParams.type = "ConcatInt8";
+ int num_inputs = node_proto.input_size();
+
+ float out_scale = getBlob(node_proto, 0).at<float>(0);
+ int out_zp = getBlob(node_proto, 1).at<int8_t>(0);
+
+ for (int i = 2; i < num_inputs; i += 3)
+ {
+ float inp_scale = getBlob(node_proto, i + 1).at<float>(0);
+ int inp_zp = getBlob(node_proto, i + 2).at<int8_t>(0);
+
+ if (inp_scale != out_scale || inp_zp != out_zp)
+ {
+ float scale = inp_scale/out_scale;
+ float shift = out_zp - scale*inp_zp;
+
+ if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
+ {
+ Mat blob = getBlob(node_proto, i);
+ Mat blob_rescaled;
+ blob.convertTo(blob_rescaled, CV_8S, scale, shift);
+ constBlobs[node_proto.input(i)] = blob_rescaled;
+ }
+ else
+ {
+ LayerParams rescaleParams;
+ rescaleParams.name = node_proto.input(i) + "/rescale";
+ rescaleParams.type = "Requantize";
+ rescaleParams.set("depth", CV_8S);
+ rescaleParams.set("scale", scale);
+ rescaleParams.set("shift", shift);
+
+ opencv_onnx::NodeProto proto;
+ proto.add_input(node_proto.input(i));
+ proto.add_output(rescaleParams.name);
+ addLayer(rescaleParams, proto);
+ node_proto.set_input(i, rescaleParams.name);
+ }
+ }
+ }
+
+ bool hasVariableInps = false;
+ for (int i = 2; i < num_inputs; i += 3)
+ {
+ if (layer_id.find(node_proto.input(i)) != layer_id.end())
+ {
+ hasVariableInps = true;
+ break;
+ }
+ }
+
+ if (!hasVariableInps)
+ {
+ std::vector<Mat> inputs, concatenated;
+ MatShape inputShape;
+ for (size_t i = 2; i < num_inputs; i += 3)
+ {
+ Mat blob = getBlob(node_proto, i);
+ if (blob.size.dims() > inputShape.size())
+ {
+ inputShape = shape(blob);
+ }
+ inputs.push_back(blob);
+ }
+
+ int axis = layerParams.get<int>("axis", 1);
+ for (size_t i = 0; i < inputs.size(); ++i)
+ {
+ MatShape targetShape = inputShape;
+ targetShape[axis] = shape(inputs[i])[axis];
+ CV_CheckEQ(total(targetShape), total(shape(inputs[i])), "");
+ inputs[i] = inputs[i].reshape(0, targetShape);
+ }
+ runLayer(layerParams, inputs, concatenated);
+ CV_Assert(concatenated.size() == 1);
+ addConstant(layerParams.name, concatenated[0]);
+ return;
+ }
+ else
+ {
+ for (int i = 2; i < num_inputs; i += 3)
+ {
+ if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
+ {
+ LayerParams constParams;
+ constParams.name = node_proto.input(i);
+ constParams.type = "ConstInt8";
+ constParams.blobs.push_back(getBlob(node_proto, i));
+ constParams.set("depth", CV_8S);
+
+ opencv_onnx::NodeProto proto;
+ proto.add_output(constParams.name);
+ addLayer(constParams, proto);
+ }
+ }
+ }
+ addLayer(layerParams, node_proto);
+}
+
const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap()
{
DispatchMap dispatch;
dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax;
dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput;
dispatch["CumSum"] = &ONNXImporter::parseCumSum;
+ dispatch["QuantizeLinear"] = dispatch["DequantizeLinear"] = &ONNXImporter::parseQuantDequant;
+ dispatch["QLinearConv"] = &ONNXImporter::parseQConv;
+ dispatch["QLinearMatMul"] = &ONNXImporter::parseQMatMul;
+ dispatch["QLinearAdd"] = dispatch["QLinearMul"] = &ONNXImporter::parseQEltwise;
+ dispatch["QLinearLeakyRelu"] = &ONNXImporter::parseQLeakyRelu;
+ dispatch["QLinearSigmoid"] = &ONNXImporter::parseQSigmoid;
+ dispatch["QLinearAveragePool"] = dispatch["QLinearGlobalAveragePool"] = &ONNXImporter::parseQAvgPool;
+ dispatch["QLinearConcat"] = &ONNXImporter::parseQConcat;
return dispatch;
}