namespace dnn
{
+static void broadcast1D2TargetMat(Mat& data, const MatShape& targetShape, int axis)
+{
+ // The data is the 1-D scales or zeropoints.
+ CV_Assert(axis >= 0 && targetShape.size() > axis && data.total() == targetShape[axis]);
+ std::vector<int> broadcast_axes;
+ for (int i = 0; i < targetShape.size(); i++)
+ {
+ if (i != axis)
+ broadcast_axes.push_back(i);
+ }
+
+ MatShape subTargetShape = shape(data);
+
+ // convert std::vector to 1D Mat.
+ for (auto broadcast_axis : broadcast_axes)
+ {
+ subTargetShape[broadcast_axis] = targetShape[broadcast_axis];
+ data = data.reshape(0, total(data, 0, broadcast_axis));
+ Mat tmp = cv::repeat(data, 1, subTargetShape[broadcast_axis]);
+ data = tmp.reshape(0, subTargetShape);
+ }
+}
+
+static void broadcastScaleAndZeropoint(Mat& scalesMat, Mat& zeropointsMat, const std::vector<float>& scales,
+ const std::vector<int>& zeropoints, const MatShape& targetShape, int axis)
+{
+ // broad cast the scales and zeropoint to the input shape.
+ MatShape subTargetShape(targetShape.size(), 1);
+ subTargetShape[axis] = scales.size();
+
+ zeropointsMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
+ scalesMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
+
+ const int len = scales.size();
+ // Deep copy the scales and zeropoint data and prevent the original data from being changed.
+
+ float * scalePtr = scalesMat.ptr<float>(0);
+ for (int i = 0; i < len; i++)
+ scalePtr[i] = scales[i];
+
+ float * zpPtr = zeropointsMat.ptr<float>(0);
+ for (int i = 0; i < len; i++)
+ zpPtr[i] = (float )zeropoints[i];
+
+ broadcast1D2TargetMat(scalesMat, targetShape, axis);
+ broadcast1D2TargetMat(zeropointsMat, targetShape, axis);
+}
+
// Quantize FP32/FP16 Inputs to INT8
class QuantizeLayerImpl CV_FINAL : public QuantizeLayer
{
public:
+ int axis;
+ bool is1D;
+ Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data.
+
QuantizeLayerImpl(const LayerParams& params)
{
- scale = params.get<float>("scales", 1.0f);
- zeropoint = params.get<int>("zeropoints", 0);
+ is1D = params.get<bool>("is1D", false);
+ axis = params.get<int>("axis", 1);
+ if (!is1D)
+ {
+ scales.push_back(params.get<float>("scales", 1.0f));
+ zeropoints.push_back(params.get<int>("zeropoints", 0));
+ }
+ else
+ {
+ DictValue paramScales = params.get("scales");
+ int i, n = paramScales.size();
+
+ CV_Assert(n > 0);
+ scales.resize(n, 0.);
+ for (i = 0; i < n; i++)
+ scales[i] = paramScales.get<float>(i);
+
+ zeropoints.resize(n, 0);
+ DictValue paramZp = params.get("zeropoints");
+ n = paramZp.size();
+
+ for (i = 0; i < n; i++)
+ zeropoints[i] = paramZp.get<int>(i);
+ }
setParamsFrom(params);
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
+
+ axis = normalize_axis(axis, shape(inputs[0]).size());
+
+ if (is1D)
+ {
+ MatShape inputShape = shape(inputs[0]);
+ broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis);
+ }
}
#ifdef HAVE_OPENCL
inputs[0] = inputFp32; // replace
}
- inputs[0].convertTo(outputs[0], CV_8S, 1.f/scale, zeropoint);
+ inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]);
return true;
}
#endif
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
- CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
+ CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && !is1D,
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
- inputs[0].convertTo(outputs[0], CV_8S, 1.f/scale, zeropoint);
+ if (outputs[0].depth() != CV_8S)
+ outputs[0].convertTo(outputs[0], CV_8S);
+
+ if (is1D)
+ {
+ Mat inputTmp;
+ divide(inputs[0], scalesMat, inputTmp);
+ subtract(inputTmp, zeropointsMat, inputTmp);
+
+ inputTmp.convertTo(outputs[0], CV_8S);
+ }
+ else
+ inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]);
}
};
class DequantizeLayerImpl CV_FINAL : public DequantizeLayer
{
public:
+ int axis;
+ bool is1D;
+ Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data.
+
DequantizeLayerImpl(const LayerParams& params)
{
- scale = params.get<float>("scales", 1.0f);
- zeropoint = params.get<int>("zeropoints", 0);
+ is1D = params.get<bool>("is1D", false);
+ axis = params.get<int>("axis", 1);
+
+ if (!is1D)
+ {
+ scales.push_back(params.get<float>("scales", 1.0f));
+ zeropoints.push_back(params.get<int>("zeropoints", 0));
+ }
+ else
+ {
+ DictValue paramScales = params.get("scales");
+ int i, n = paramScales.size();
+
+ CV_Assert(n > 0);
+ scales.resize(n);
+ for (i = 0; i < n; i++)
+ scales[i] = paramScales.get<float>(i);
+
+ zeropoints.resize(n, 0);
+ DictValue paramZp = params.get("zeropoints");
+ n = paramZp.size();
+
+ for (i = 0; i < n; i++)
+ zeropoints[i] = paramZp.get<int>(i);
+ }
+
setParamsFrom(params);
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
+
+ axis = normalize_axis(axis, shape(inputs[0]).size());
+
+ if (is1D)
+ {
+ MatShape inputShape = shape(inputs[0]);
+ broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis);
+ }
}
#ifdef HAVE_OPENCL
outputs_.getUMatVector(outputs);
UMat outputFp32;
- inputs[0].convertTo(outputFp32, CV_32F, scale, -(scale*zeropoint));
+ inputs[0].convertTo(outputFp32, CV_32F, scales[0], -(scales[0]*zeropoints[0]));
if (outputs_.depth() == CV_16S)
convertFp16(outputFp32, outputs[0]);
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
- CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
+ CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && !is1D,
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
- inputs[0].convertTo(outputs[0], CV_32F, scale, -(scale*zeropoint));
+ if (outputs[0].depth() != CV_32F)
+ outputs[0].convertTo(outputs[0], CV_32F);
+
+ if (is1D)
+ {
+ Mat inputTmp;
+ inputs[0].convertTo(inputTmp, CV_32F);
+ subtract(inputTmp, zeropointsMat, inputTmp);
+ multiply(inputTmp, scalesMat, outputs[0]);
+ }
+ else
+ inputs[0].convertTo(outputs[0], CV_32F, scales[0], -(scales[0]*zeropoints[0]));
}
};
class ONNXLayerHandler;
template <typename T>
-static T getScaleFromMat(Mat m)
+static T getScalarFromMat(Mat m)
{
CV_Assert(m.total() == 1);
return m.at<T>(0);
inpShapes[i] = shape(inputs[i]);
if (i > 0 && ddepth != inputs[i].depth())
CV_Error(Error::StsNotImplemented, "Mixed input data types.");
- ddepth = inputs[i].depth();
+
+ // Quantize and Dequantize layer have different output type than input.
+ if (params.type != "Quantize" && params.type != "Dequantize")
+ ddepth = inputs[i].depth();
}
std::vector<MatShape> outShapes, internalShapes;
{
CV_Assert(node_proto.input_size() == 2 || node_proto.input_size() == 3);
layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize";
+ int axis = layerParams.get<int>("axis", 1);
+ // For QuantizeLinear and DequantizeLinear, the scale and zeropoint can be a Scalar (per-tensor quantized)
+ // or 1-D tensor (per-channel quantized).
+ bool is1D = false;
+
+ Mat scaleMat = getBlob(node_proto, 1);
+ if(scaleMat.total() > 1) is1D = true;
- float scale = getScaleFromMat<float>(getBlob(node_proto, 1));
- int zeropoint = 0;
+ Mat zpMat;
if (node_proto.input_size() == 3)
- zeropoint = (int)getScaleFromMat<int8_t>(getBlob(node_proto, 2));
+ {
+ zpMat = getBlob(node_proto, 2);
+ CV_Assert(zpMat.total() == scaleMat.total()); // zero point should has the same shape as scale.
+ }
+
+ if (is1D)
+ {
+ const int num = scaleMat.total();
- layerParams.set("scales", scale);
- layerParams.set("zeropoints", zeropoint);
+ std::vector<int> zeropoints(num, 0);
+ std::vector<float> scales(num, 0);
+
+ for (int i = 0; i < num; i++)
+ {
+ scales[i] = scaleMat.at<float>(i);
+ if (!zpMat.empty())
+ zeropoints[i] = zpMat.depth() == CV_32S ?
+ zpMat.at<int>(i) : (int)zpMat.at<int8_t>(i);
+ }
+
+ layerParams.set("is1D", true);
+ layerParams.set("axis", axis);
+ layerParams.set("scales", DictValue::arrayReal(scales.data(), scales.size()));
+ layerParams.set("zeropoints", DictValue::arrayInt(zeropoints.data(), zeropoints.size()));
+ }
+ else
+ {
+ int zeropoint = zpMat.empty() ? 0 : zpMat.depth() == CV_32S ?
+ getScalarFromMat<int>(zpMat) : (int)getScalarFromMat<int8_t>(zpMat);
+ float scale = getScalarFromMat<float>(scaleMat);
+
+ layerParams.set("is1D", false);
+ layerParams.set("scales", scale);
+ layerParams.set("zeropoints", zeropoint);
+ }
if (layerParams.type == "Quantize")
layerParams.set("depth", CV_8S);
else // Dequantize
layerParams.set("depth", CV_32F);
- addLayer(layerParams, node_proto);
+ if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) // Variable input.
+ {
+ std::vector<Mat> inputs, outputs;
+ inputs.push_back(getBlob(node_proto, 0));
+
+ runLayer(layerParams, inputs, outputs);
+ addConstant(node_proto.output(0), outputs[0]);
+ }
+ else
+ addLayer(layerParams, node_proto);
}
void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_)
int ninputs = node_proto.input_size();
CV_Assert(ninputs == 8 || ninputs == 9);
- float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
- int inp_zp = (int)getScaleFromMat<int8_t>(getBlob(node_proto, 2));
+ float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
+ int inp_zp = (int)getScalarFromMat<int8_t>(getBlob(node_proto, 2));
if (layerParams.has("pad"))
{
bool per_channel = w_scale.total() == outCn;
Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
- float out_sc = getScaleFromMat<float>(getBlob(node_proto, 6));
- int8_t out_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 7));
+ float out_sc = getScalarFromMat<float>(getBlob(node_proto, 6));
+ int8_t out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 7));
Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S);
int firstInpDims = outShapes[node_proto.input(0)].size();
- float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
- int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
+ float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
+ int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
Mat weights = getBlob(node_proto, 3).t();
int outCn = weights.size[0];
bool per_channel = w_scale.total() == outCn ? true : false;
Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at<float>(0)));
- float out_sc = getScaleFromMat<float>(getBlob(node_proto, 6));
- int8_t out_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 7));
+ float out_sc = getScalarFromMat<float>(getBlob(node_proto, 6));
+ int8_t out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 7));
Mat bias(1, outCn, CV_32S);
Mat outputMultiplier(1, outCn, CV_32F);
int firstInpDims = outShapes[node_proto.input(0)].size();
- float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
- int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
+ float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
+ int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
int outCn = weights.size[0];
int secondInpDims = weights.dims;
CV_Error(Error::StsUnsupportedFormat, "The zero-point non-zero case of W is not supported!");
}
- float out_sc = getScaleFromMat<float>(getBlob(node_proto, 7));
- int8_t out_zp = ninputs == 9 ? getScaleFromMat<int8_t>(getBlob(node_proto, 8)) : 0;
+ float out_sc = getScalarFromMat<float>(getBlob(node_proto, 7));
+ int8_t out_zp = ninputs == 9 ? getScalarFromMat<int8_t>(getBlob(node_proto, 8)) : 0;
Mat bias;
if (constBlobs.find(node_proto.input(6)) != constBlobs.end())
constId = i;
}
- float inp_0_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
- int8_t inp_0_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
+ float inp_0_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
+ int8_t inp_0_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
- float inp_1_sc = getScaleFromMat<float>(getBlob(node_proto, 4));
- int8_t inp_1_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 5));
+ float inp_1_sc = getScalarFromMat<float>(getBlob(node_proto, 4));
+ int8_t inp_1_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 5));
// Set 2nd input as the const input
if (constId == 0)
cv::swap(inp_0_zp, inp_1_zp);
}
- float out_sc = getScaleFromMat<float>(getBlob(node_proto, 6));
+ float out_sc = getScalarFromMat<float>(getBlob(node_proto, 6));
int8_t out_zp = 0;
if (node_proto.input_size() == 8)
- out_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 7));
+ out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 7));
std::vector<float> inp_scales = {inp_0_sc, inp_1_sc};
std::vector<int8_t> inp_zps = {inp_0_zp, inp_1_zp};
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
float slope = layerParams.get<float>("alpha");
- float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
- int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
- float out_sc = getScaleFromMat<float>(getBlob(node_proto, 3));
- int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScaleFromMat<int8_t>(getBlob(node_proto, 4));
+ float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
+ int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
+ float out_sc = getScalarFromMat<float>(getBlob(node_proto, 3));
+ int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat<int8_t>(getBlob(node_proto, 4));
Mat lookUpTable(1, 256, CV_8S);
int8_t* table = lookUpTable.ptr<int8_t>();
{
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
- float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
- int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
- float out_sc = getScaleFromMat<float>(getBlob(node_proto, 3));
- int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScaleFromMat<int8_t>(getBlob(node_proto, 4));
+ float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
+ int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
+ float out_sc = getScalarFromMat<float>(getBlob(node_proto, 3));
+ int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat<int8_t>(getBlob(node_proto, 4));
Mat lookUpTable(1, 256, CV_8S);
int8_t* table = lookUpTable.ptr<int8_t>();
{
CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5);
- float inp_sc = getScaleFromMat<float>(getBlob(node_proto, 1));
- int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 2));
- float out_sc = getScaleFromMat<float>(getBlob(node_proto, 3));
- int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScaleFromMat<int8_t>(getBlob(node_proto, 4));
+ float inp_sc = getScalarFromMat<float>(getBlob(node_proto, 1));
+ int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 2));
+ float out_sc = getScalarFromMat<float>(getBlob(node_proto, 3));
+ int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat<int8_t>(getBlob(node_proto, 4));
layerParams.type = "PoolingInt8";
layerParams.set("pool", "ave");
layerParams.type = "ConcatInt8";
int num_inputs = node_proto.input_size();
- float out_scale = getScaleFromMat<float>(getBlob(node_proto, 0));
- int8_t out_zp = getScaleFromMat<int8_t>(getBlob(node_proto, 1));
+ float out_scale = getScalarFromMat<float>(getBlob(node_proto, 0));
+ int8_t out_zp = getScalarFromMat<int8_t>(getBlob(node_proto, 1));
for (int i = 2; i < num_inputs; i += 3)
{
- float inp_scale = getScaleFromMat<float>(getBlob(node_proto, i + 1));
- int8_t inp_zp = getScaleFromMat<int8_t>(getBlob(node_proto, i + 2));
+ float inp_scale = getScalarFromMat<float>(getBlob(node_proto, i + 1));
+ int8_t inp_zp = getScalarFromMat<int8_t>(getBlob(node_proto, i + 2));
if (inp_scale != out_scale || inp_zp != out_zp)
{