2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "QuantizationUtils.h"
28 bool is_quantized(const CircleNode *node)
30 return node->quantparam() != nullptr &&
31 (node->dtype() == loco::DataType::U8 || // activation, weight (uint8 quant)
32 node->dtype() == loco::DataType::S16 || // activation, weight (int16 quant)
33 node->dtype() == loco::DataType::S32 || // bias (uint8 quant)
34 node->dtype() == loco::DataType::S64); // bias (int16 quant)
37 bool is_fp32(const CircleNode *node) { return node->dtype() == loco::DataType::FLOAT32; }
39 uint8_t fp32_to_uint8_cast(float f)
41 assert(std::numeric_limits<uint8_t>::min() <= f);
42 assert(f <= std::numeric_limits<uint8_t>::max());
43 return static_cast<uint8_t>(f);
46 void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
47 float &scaling_factor, int64_t &zp, float &nudged_min,
50 const int32_t kMinScale = 0;
51 const int32_t kMaxScale = 255;
53 uint32_t size = node->size<loco::DataType::FLOAT32>();
54 compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
55 const float scaling_factor_inv = 1.0 / scaling_factor;
56 std::vector<int32_t> quantized_values(size);
57 for (uint32_t i = 0; i < size; ++i)
60 auto data = node->at<loco::DataType::FLOAT32>(i);
61 data = data < nudged_min ? nudged_min : data;
62 data = data > nudged_max ? nudged_max : data;
64 static_cast<int32_t>(std::round((data - nudged_min) * scaling_factor_inv));
67 node->dtype(loco::DataType::U8); // change the type of tensor
68 node->size<loco::DataType::U8>(size); // resize tensor
69 for (uint32_t i = 0; i < size; ++i)
71 node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
75 void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
76 float &scaling_factor, float &nudged_min,
79 const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
80 const int32_t kMinScale = -kMaxScale;
82 uint32_t size = node->size<loco::DataType::FLOAT32>();
83 compute_sym_scale(min, max, scaling_factor, nudged_min, nudged_max);
84 const float scaling_factor_inv = 1.0 / scaling_factor;
85 std::vector<int32_t> quantized_values(size);
86 for (uint32_t i = 0; i < size; ++i)
89 auto data = node->at<loco::DataType::FLOAT32>(i);
90 data = data < nudged_min ? nudged_min : data;
91 data = data > nudged_max ? nudged_max : data;
92 quantized_values[i] = static_cast<int32_t>(std::round(data * scaling_factor_inv));
95 node->dtype(loco::DataType::S16); // change the type of tensor
96 node->size<loco::DataType::S16>(size); // resize tensor
97 for (uint32_t i = 0; i < size; ++i)
99 node->at<loco::DataType::S16>(i) =
100 std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
104 void compute_sym_scale(float min, float max, float &scaling_factor, float &nudged_min,
105 float &nudged_max, loco::DataType out_type)
108 assert(out_type == loco::DataType::S8 || out_type == loco::DataType::S16);
110 const int32_t kMaxScale = (out_type == loco::DataType::S16) ? std::numeric_limits<int16_t>::max()
111 : std::numeric_limits<int8_t>::max();
112 const int32_t kMinScale = -kMaxScale;
113 const double qmin_double = kMinScale;
114 const double qmax_double = kMaxScale;
115 const double rmin = std::fmin(0, min);
116 const double rmax = std::fmax(0, max);
117 double scale_factor_from_min_side{0};
118 double scale_factor_from_max_side{0};
120 if ((qmin_double * rmin) > 0)
121 scale_factor_from_min_side = rmin / qmin_double;
123 if ((qmax_double * rmax) > 0)
124 scale_factor_from_max_side = rmax / qmax_double;
126 scaling_factor = scale_factor_from_min_side > scale_factor_from_max_side
127 ? scale_factor_from_min_side
128 : scale_factor_from_max_side;
130 // protect scale from being very low to avoid overflow/underflow
131 const float kMinScalingFactor = (out_type == loco::DataType::S16) ? 1e-8 : 1e-5;
132 scaling_factor = std::max(scaling_factor, kMinScalingFactor);
134 nudged_min = static_cast<float>(qmin_double * scaling_factor);
135 nudged_max = static_cast<float>(qmax_double * scaling_factor);
138 void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
139 float &nudged_min, float &nudged_max)
144 const int32_t kMinScale = 0;
145 const int32_t kMaxScale = 255;
146 const double qmin_double = kMinScale;
147 const double qmax_double = kMaxScale;
148 const double rmin = std::fmin(0, min);
149 const double rmax = std::fmax(0, max);
151 double scale = (rmax - rmin) / (qmax_double - qmin_double);
152 double zero_point_double = 0;
153 uint8_t nudged_zero_point = 0;
156 WARN(l) << "The minimum and maximum values are the same." << std::endl;
157 if (min >= 0 && max >= 0)
158 zero_point_double = kMinScale;
160 zero_point_double = kMaxScale;
163 zero_point_double = qmin_double - rmin / scale;
166 assert(min >= 0 && max >= 0);
167 nudged_zero_point = kMinScale;
168 scale = max / (qmax_double - qmin_double);
169 if (min > 0 && max > 0)
170 WARN(l) << "The minimum and maximum values are all positive." << std::endl;
174 assert(min < 0 && max < 0);
175 nudged_zero_point = kMaxScale;
176 scale = -min / (qmax_double - qmin_double);
177 WARN(l) << "The minimum and maximum values are all negative." << std::endl;
181 assert(min < 0 && max >= 0);
182 nudged_zero_point = fp32_to_uint8_cast(std::round(zero_point_double));
185 // protect scale from being very low due to overflow
189 nudged_zero_point = fp32_to_uint8_cast(std::round(qmin_double - rmin / scale));
192 nudged_min = static_cast<float>((qmin_double - nudged_zero_point) * scale);
193 nudged_max = static_cast<float>((qmax_double - nudged_zero_point) * scale);
195 scaling_factor = scale;
196 zp = nudged_zero_point;
199 bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
200 int32_t &channel_dim_index)
202 auto succs = loco::succs(node);
204 // opcode is initialized to CIRCLEINPUT, because
205 // CIRCLEINPUT should never be the successor of any node
206 // (this is checked w/ the assert in the loop body)
207 luci::CircleOpcode opcode = luci::CircleOpcode::CIRCLEINPUT;
208 for (auto out : succs)
210 const auto circle_node = static_cast<CircleNode *>(out);
211 assert(circle_node->opcode() != luci::CircleOpcode::CIRCLEINPUT);
213 if (opcode == luci::CircleOpcode::CIRCLEINPUT)
215 opcode = circle_node->opcode();
219 // Node is used by multiple layers with different opcodes
220 // We do not care such cases
221 if (opcode != circle_node->opcode())
226 for (auto out : succs)
228 auto conv = dynamic_cast<CircleConv2D *>(out);
229 auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
230 auto tw_conv = dynamic_cast<CircleTransposeConv *>(out);
231 auto fc = dynamic_cast<CircleFullyConnected *>(out);
233 // Refer to https://github.com/Samsung/ONE/pull/2448.
234 if ((conv != nullptr && conv->filter() == node) ||
235 (tw_conv != nullptr && tw_conv->filter() == node)) // OHWI
237 assert(node->rank() == 4);
238 dimension.dim(0).set(node->dim(0).value());
239 dimension.dim(1).set(node->dim(1).value());
240 dimension.dim(2).set(node->dim(2).value());
241 dimension.dim(3).set(node->dim(3).value());
242 channel_dim_index = 0; // Set channel_dim_index based on "O"
245 else if (dw_conv != nullptr && dw_conv->filter() == node) // IHWC
247 assert(node->rank() == 4);
248 dimension.dim(0).set(node->dim(0).value());
249 dimension.dim(1).set(node->dim(1).value());
250 dimension.dim(2).set(node->dim(2).value());
251 dimension.dim(3).set(node->dim(3).value());
252 channel_dim_index = 3; // Set channel_dim_index based on "C"
255 else if (fc != nullptr && fc->weights() == node) // OI
257 assert(node->rank() == 2);
258 dimension.dim(0).set(node->dim(0).value());
259 dimension.dim(1).set(1); // Set FC layer like CONV
260 dimension.dim(2).set(1);
261 dimension.dim(3).set(node->dim(1).value());
262 channel_dim_index = 0; // Set channel_dim_index based on "O"
267 // node does not support channle-wise quantization
275 uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
277 return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
278 dimension.dim(3).value() +
279 indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
280 indices[2] * dimension.dim(3).value() + indices[3];
283 // Activation (ofm) qtype is determined in different ways.
284 // 1. Pre-defined values: Some Ops have pre-defined qparams (ex: LOGISTIC, TANH)
285 // 2. Integer scale: Output of some Ops should be integers (ex: FLOOR, CEIL)
286 // 3. Activation qtype of input: Some Ops propagate qparam from input to output (ex: QUANTIZE,
287 // TRANSPOSE, etc. See PropagateQParamForwardPass.cpp for more details).
288 ActivationQType activation_qtype(const CircleNode *node)
290 auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
291 if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
292 return ActivationQType::PreDefinedTanh;
294 #define RETURN_INPUT_ACTIVATION_QTYPE(CLASS, INPUT) \
296 auto n = loco::must_cast<const CLASS *>(node); \
297 auto input = loco::must_cast<CircleNode *>(n->INPUT()); \
298 return activation_qtype(input); \
301 switch (node->opcode())
303 case CircleOpcode::LOGISTIC:
304 return ActivationQType::PreDefinedLogistic;
305 case CircleOpcode::TANH:
306 return ActivationQType::PreDefinedTanh;
307 case CircleOpcode::SOFTMAX:
308 return ActivationQType::PreDefinedSoftmax;
309 case CircleOpcode::FLOOR:
310 case CircleOpcode::FLOOR_DIV:
311 case CircleOpcode::FLOOR_MOD:
312 case CircleOpcode::CEIL:
313 return ActivationQType::IntScale;
314 case CircleOpcode::GATHER:
315 RETURN_INPUT_ACTIVATION_QTYPE(CircleGather, params);
316 case CircleOpcode::RESHAPE:
317 RETURN_INPUT_ACTIVATION_QTYPE(CircleReshape, tensor);
318 case CircleOpcode::TRANSPOSE:
319 RETURN_INPUT_ACTIVATION_QTYPE(CircleTranspose, a);
320 case CircleOpcode::STRIDED_SLICE:
321 RETURN_INPUT_ACTIVATION_QTYPE(CircleStridedSlice, input);
322 case CircleOpcode::SPLIT:
323 RETURN_INPUT_ACTIVATION_QTYPE(CircleSplit, input);
324 case CircleOpcode::CIRCLESPLITOUT:
325 RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitOut, input);
326 case CircleOpcode::SPLIT_V:
327 RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitV, input);
328 case CircleOpcode::CIRCLESPLITVOUT:
329 RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitVOut, input);
330 case CircleOpcode::UNPACK:
331 RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpack, value);
332 case CircleOpcode::CIRCLEUNPACKOUT:
333 RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpackOut, input);
334 case CircleOpcode::QUANTIZE:
335 RETURN_INPUT_ACTIVATION_QTYPE(CircleQuantize, input);
340 #undef RETURN_INPUT_ACTIVATION_QTYPE
342 return ActivationQType::MinMax;
345 std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype,
346 loco::DataType dtype)
348 auto qparam = std::make_unique<CircleQuantParam>();
350 auto set_qparam = [&qparam](float scale, int64_t zp) {
351 qparam->scale.emplace_back(scale);
352 qparam->zerop.emplace_back(zp);
357 case ActivationQType::PreDefinedLogistic:
358 if (dtype == loco::DataType::U8)
359 set_qparam(1.0f / 256.0f, 0);
362 assert(dtype == loco::DataType::S16);
363 set_qparam(1.0f / 32768.0f, 0);
366 case ActivationQType::PreDefinedTanh:
367 if (dtype == loco::DataType::U8)
368 set_qparam(2.0f / 256.0f, 128);
371 assert(dtype == loco::DataType::S16);
372 set_qparam(1.0f / 32768.0f, 0);
375 case ActivationQType::PreDefinedSoftmax:
376 if (dtype == loco::DataType::U8)
377 set_qparam(1.0f / 255.0f, 0);
380 assert(dtype == loco::DataType::S16);
381 set_qparam(1.0f / 32767.0f, 0);
385 throw std::runtime_error("Unsupported opcode with pre-defined qparam");
390 // For nodes with integer output, we use integer scale
391 void set_int_scale(luci::CircleNode *node)
393 assert(node); // FIX_CALLER_UNLESS
395 auto qparam = node->quantparam();
396 assert(qparam); // FIX_CALLER_UNLESS
397 assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS
399 auto fp_scale = qparam->scale[0];
400 qparam->scale[0] = fp_scale < 1 ? 1.0f : std::round(fp_scale);
403 void quant_const(luci::CircleConst *node, loco::DataType quant_type)
405 assert(node->dtype() == loco::DataType::FLOAT32);
407 float min = std::numeric_limits<float>::max();
408 float max = std::numeric_limits<float>::lowest();
409 for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
411 auto data = node->at<loco::DataType::FLOAT32>(i);
412 min = data < min ? data : min;
413 max = data > max ? data : max;
416 float scaling_factor{0.0};
418 float nudged_min{0.0};
419 float nudged_max{0.0};
423 case loco::DataType::U8:
424 asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
427 case loco::DataType::S16:
428 symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, nudged_min,
432 throw std::runtime_error("Unsupported data type");
435 auto quantparam = std::make_unique<luci::CircleQuantParam>();
436 quantparam->scale.push_back(scaling_factor);
437 quantparam->zerop.push_back(zp);
438 node->quantparam(std::move(quantparam));
444 // TODO move this to a more global helper file
445 int nbits(loco::DataType dt) noexcept
449 case loco::DataType::S8:
450 case loco::DataType::U8:
452 case loco::DataType::S16:
453 case loco::DataType::U16:
454 case loco::DataType::FLOAT16:
456 case loco::DataType::S32:
457 case loco::DataType::U32:
458 case loco::DataType::FLOAT32:
460 case loco::DataType::S64:
463 return 64; // a safe large default
467 // TODO Check if the metric is valid
468 // Returns true if [min,max] is poorly representable
469 bool range_check(float min, float max, loco::DataType dtype)
472 return log2f(max) - log2f(min) > nbits(dtype) * thresh;
475 bool warn_scale_zp(float scale, int64_t zp, luci::CircleNode *n)
481 case loco::DataType::U8:
482 min = scale * (0 - zp);
483 max = scale * (255 - zp);
485 case loco::DataType::S16:
486 min = scale * (-32767);
487 max = scale * (32767);
492 return range_check(min, max, n->dtype());
497 void warn_accuracy_with_range(luci::CircleNode *n)
500 auto qp = n->quantparam();
501 auto k = qp->zerop.size();
502 for (uint32_t i = 0; i < k; i++)
504 if (warn_scale_zp(qp->scale[i], qp->zerop[i], n))
505 WARN(l) << "Quantization of " << i << "-th channel of " << n->name()
506 << "'s quantization may cause accuracy issues" << std::endl;