2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "QuantizationUtils.h"
28 bool is_quantized(const CircleNode *node)
30 return node->quantparam() != nullptr &&
31 (node->dtype() == loco::DataType::U8 || // activation, weight (uint8 quant)
32 node->dtype() == loco::DataType::S16 || // activation, weight (int16 quant)
33 node->dtype() == loco::DataType::S32 || // bias (uint8 quant)
34 node->dtype() == loco::DataType::S64); // bias (int16 quant)
37 bool is_fp32(const CircleNode *node) { return node->dtype() == loco::DataType::FLOAT32; }
39 uint8_t fp32_to_uint8_cast(float f)
41 assert(std::numeric_limits<uint8_t>::min() <= f);
42 assert(f <= std::numeric_limits<uint8_t>::max());
43 return static_cast<uint8_t>(f);
46 void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
47 float &scaling_factor, int64_t &zp, float &nudged_min,
50 const int32_t kMinScale = 0;
51 const int32_t kMaxScale = 255;
53 uint32_t size = node->size<loco::DataType::FLOAT32>();
54 compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
55 const float scaling_factor_inv = 1.0 / scaling_factor;
56 std::vector<int32_t> quantized_values(size);
57 for (uint32_t i = 0; i < size; ++i)
60 auto data = node->at<loco::DataType::FLOAT32>(i);
61 data = data < nudged_min ? nudged_min : data;
62 data = data > nudged_max ? nudged_max : data;
64 static_cast<int32_t>(std::round((data - nudged_min) * scaling_factor_inv));
67 node->dtype(loco::DataType::U8); // change the type of tensor
68 node->size<loco::DataType::U8>(size); // resize tensor
69 for (uint32_t i = 0; i < size; ++i)
71 node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
75 void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
76 float &scaling_factor, int64_t &zp, float &nudged_min,
79 const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
80 const int32_t kMinScale = -kMaxScale;
82 uint32_t size = node->size<loco::DataType::FLOAT32>();
83 compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
84 const float scaling_factor_inv = 1.0 / scaling_factor;
85 std::vector<int32_t> quantized_values(size);
86 for (uint32_t i = 0; i < size; ++i)
89 auto data = node->at<loco::DataType::FLOAT32>(i);
90 data = data < nudged_min ? nudged_min : data;
91 data = data > nudged_max ? nudged_max : data;
92 quantized_values[i] = static_cast<int32_t>(std::round(data * scaling_factor_inv));
95 node->dtype(loco::DataType::S16); // change the type of tensor
96 node->size<loco::DataType::S16>(size); // resize tensor
97 for (uint32_t i = 0; i < size; ++i)
99 node->at<loco::DataType::S16>(i) =
100 std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
104 void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
105 float &nudged_min, float &nudged_max)
109 const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
110 const int32_t kMinScale = -kMaxScale;
111 const double qmin_double = kMinScale;
112 const double qmax_double = kMaxScale;
113 const double rmin = std::fmin(0, min);
114 const double rmax = std::fmax(0, max);
115 double scale_factor_from_min_side{0};
116 double scale_factor_from_max_side{0};
118 if ((qmin_double * rmin) > 0)
119 scale_factor_from_min_side = rmin / qmin_double;
121 if ((qmax_double * rmax) > 0)
122 scale_factor_from_max_side = rmax / qmax_double;
124 scaling_factor = scale_factor_from_min_side > scale_factor_from_max_side
125 ? scale_factor_from_min_side
126 : scale_factor_from_max_side;
128 // protect scale from being very low to avoid overflow/underflow
129 if (scaling_factor < 1e-8)
130 scaling_factor = 1e-8;
133 nudged_min = static_cast<float>(qmin_double * scaling_factor);
134 nudged_max = static_cast<float>(qmax_double * scaling_factor);
137 void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
138 float &nudged_min, float &nudged_max)
143 const int32_t kMinScale = 0;
144 const int32_t kMaxScale = 255;
145 const double qmin_double = kMinScale;
146 const double qmax_double = kMaxScale;
147 const double rmin = std::fmin(0, min);
148 const double rmax = std::fmax(0, max);
150 double scale = (rmax - rmin) / (qmax_double - qmin_double);
151 double zero_point_double = 0;
152 uint8_t nudged_zero_point = 0;
155 WARN(l) << "The minimum and maximum values are the same." << std::endl;
156 if (min >= 0 && max >= 0)
157 zero_point_double = kMinScale;
159 zero_point_double = kMaxScale;
162 zero_point_double = qmin_double - rmin / scale;
165 assert(min >= 0 && max >= 0);
166 nudged_zero_point = kMinScale;
167 scale = max / (qmax_double - qmin_double);
168 if (min > 0 && max > 0)
169 WARN(l) << "The minimum and maximum values are all positive." << std::endl;
173 assert(min < 0 && max < 0);
174 nudged_zero_point = kMaxScale;
175 scale = -min / (qmax_double - qmin_double);
176 WARN(l) << "The minimum and maximum values are all negative." << std::endl;
180 assert(min < 0 && max >= 0);
181 nudged_zero_point = fp32_to_uint8_cast(std::round(zero_point_double));
184 // protect scale from being very low due to overflow
188 nudged_zero_point = fp32_to_uint8_cast(std::round(qmin_double - rmin / scale));
191 nudged_min = static_cast<float>((qmin_double - nudged_zero_point) * scale);
192 nudged_max = static_cast<float>((qmax_double - nudged_zero_point) * scale);
194 scaling_factor = scale;
195 zp = nudged_zero_point;
198 bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
199 int32_t &channel_dim_index)
201 auto succs = loco::succs(node);
203 // opcode is initialized to CIRCLEINPUT, because
204 // CIRCLEINPUT should never be the successor of any node
205 // (this is checked w/ the assert in the loop body)
206 luci::CircleOpcode opcode = luci::CircleOpcode::CIRCLEINPUT;
207 for (auto out : succs)
209 const auto circle_node = static_cast<CircleNode *>(out);
210 assert(circle_node->opcode() != luci::CircleOpcode::CIRCLEINPUT);
212 if (opcode == luci::CircleOpcode::CIRCLEINPUT)
214 opcode = circle_node->opcode();
218 // Node is used by multiple layers with different opcodes
219 // We do not care such cases
220 if (opcode != circle_node->opcode())
225 for (auto out : succs)
227 auto conv = dynamic_cast<CircleConv2D *>(out);
228 auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
229 auto tw_conv = dynamic_cast<CircleTransposeConv *>(out);
230 auto fc = dynamic_cast<CircleFullyConnected *>(out);
232 // Refer to https://github.com/Samsung/ONE/pull/2448.
233 if ((conv != nullptr && conv->filter() == node) ||
234 (tw_conv != nullptr && tw_conv->filter() == node)) // OHWI
236 assert(node->rank() == 4);
237 dimension.dim(0).set(node->dim(0).value());
238 dimension.dim(1).set(node->dim(1).value());
239 dimension.dim(2).set(node->dim(2).value());
240 dimension.dim(3).set(node->dim(3).value());
241 channel_dim_index = 0; // Set channel_dim_index based on "O"
244 else if (dw_conv != nullptr && dw_conv->filter() == node) // IHWC
246 assert(node->rank() == 4);
247 dimension.dim(0).set(node->dim(0).value());
248 dimension.dim(1).set(node->dim(1).value());
249 dimension.dim(2).set(node->dim(2).value());
250 dimension.dim(3).set(node->dim(3).value());
251 channel_dim_index = 3; // Set channel_dim_index based on "C"
254 else if (fc != nullptr && fc->weights() == node) // OI
256 assert(node->rank() == 2);
257 dimension.dim(0).set(node->dim(0).value());
258 dimension.dim(1).set(1); // Set FC layer like CONV
259 dimension.dim(2).set(1);
260 dimension.dim(3).set(node->dim(1).value());
261 channel_dim_index = 0; // Set channel_dim_index based on "O"
266 // node does not support channle-wise quantization
274 uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
276 return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
277 dimension.dim(3).value() +
278 indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
279 indices[2] * dimension.dim(3).value() + indices[3];
282 // Activation (ofm) qtype is determined in different ways.
283 // 1. Pre-defined values: Some Ops have pre-defined qparams (ex: LOGISTIC, TANH)
284 // 2. Integer scale: Output of some Ops should be integers (ex: FLOOR, CEIL)
285 // 3. Activation qtype of input: Some Ops propagate qparam from input to output (ex: QUANTIZE,
286 // TRANSPOSE, etc. See PropagateQParamForwardPass.cpp for more details).
287 ActivationQType activation_qtype(const CircleNode *node)
289 auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
290 if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
291 return ActivationQType::PreDefinedTanh;
293 #define RETURN_INPUT_ACTIVATION_QTYPE(CLASS, INPUT) \
295 auto n = loco::must_cast<const CLASS *>(node); \
296 auto input = loco::must_cast<CircleNode *>(n->INPUT()); \
297 return activation_qtype(input); \
300 switch (node->opcode())
302 case CircleOpcode::LOGISTIC:
303 return ActivationQType::PreDefinedLogistic;
304 case CircleOpcode::TANH:
305 return ActivationQType::PreDefinedTanh;
306 case CircleOpcode::SOFTMAX:
307 return ActivationQType::PreDefinedSoftmax;
308 case CircleOpcode::FLOOR:
309 case CircleOpcode::FLOOR_DIV:
310 case CircleOpcode::FLOOR_MOD:
311 case CircleOpcode::CEIL:
312 return ActivationQType::IntScale;
313 case CircleOpcode::GATHER:
314 RETURN_INPUT_ACTIVATION_QTYPE(CircleGather, params);
315 case CircleOpcode::RESHAPE:
316 RETURN_INPUT_ACTIVATION_QTYPE(CircleReshape, tensor);
317 case CircleOpcode::TRANSPOSE:
318 RETURN_INPUT_ACTIVATION_QTYPE(CircleTranspose, a);
319 case CircleOpcode::STRIDED_SLICE:
320 RETURN_INPUT_ACTIVATION_QTYPE(CircleStridedSlice, input);
321 case CircleOpcode::SPLIT:
322 RETURN_INPUT_ACTIVATION_QTYPE(CircleSplit, input);
323 case CircleOpcode::CIRCLESPLITOUT:
324 RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitOut, input);
325 case CircleOpcode::SPLIT_V:
326 RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitV, input);
327 case CircleOpcode::CIRCLESPLITVOUT:
328 RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitVOut, input);
329 case CircleOpcode::UNPACK:
330 RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpack, value);
331 case CircleOpcode::CIRCLEUNPACKOUT:
332 RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpackOut, input);
333 case CircleOpcode::QUANTIZE:
334 RETURN_INPUT_ACTIVATION_QTYPE(CircleQuantize, input);
339 #undef RETURN_INPUT_ACTIVATION_QTYPE
341 return ActivationQType::MinMax;
344 std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype,
345 loco::DataType dtype)
347 auto qparam = std::make_unique<CircleQuantParam>();
349 auto set_qparam = [&qparam](float scale, int64_t zp) {
350 qparam->scale.emplace_back(scale);
351 qparam->zerop.emplace_back(zp);
356 case ActivationQType::PreDefinedLogistic:
357 if (dtype == loco::DataType::U8)
358 set_qparam(1.0f / 256.0f, 0);
361 assert(dtype == loco::DataType::S16);
362 set_qparam(1.0f / 32768.0f, 0);
365 case ActivationQType::PreDefinedTanh:
366 if (dtype == loco::DataType::U8)
367 set_qparam(2.0f / 256.0f, 128);
370 assert(dtype == loco::DataType::S16);
371 set_qparam(1.0f / 32768.0f, 0);
374 case ActivationQType::PreDefinedSoftmax:
375 if (dtype == loco::DataType::U8)
376 set_qparam(1.0f / 255.0f, 0);
379 assert(dtype == loco::DataType::S16);
380 set_qparam(1.0f / 32767.0f, 0);
384 throw std::runtime_error("Unsupported opcode with pre-defined qparam");
389 // For nodes with integer output, we use integer scale
390 void set_int_scale(luci::CircleNode *node)
392 assert(node); // FIX_CALLER_UNLESS
394 auto qparam = node->quantparam();
395 assert(qparam); // FIX_CALLER_UNLESS
396 assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS
398 auto fp_scale = qparam->scale[0];
399 qparam->scale[0] = fp_scale < 1 ? 1.0f : std::round(fp_scale);
402 void quant_const(luci::CircleConst *node, loco::DataType quant_type)
404 assert(node->dtype() == loco::DataType::FLOAT32);
406 float min = std::numeric_limits<float>::max();
407 float max = std::numeric_limits<float>::lowest();
408 for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
410 auto data = node->at<loco::DataType::FLOAT32>(i);
411 min = data < min ? data : min;
412 max = data > max ? data : max;
415 float scaling_factor{0.0};
417 float nudged_min{0.0};
418 float nudged_max{0.0};
422 case loco::DataType::U8:
423 asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
426 case loco::DataType::S16:
427 symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
431 throw std::runtime_error("Unsupported data type");
434 auto quantparam = std::make_unique<luci::CircleQuantParam>();
435 quantparam->scale.push_back(scaling_factor);
436 quantparam->zerop.push_back(zp);
437 node->quantparam(std::move(quantparam));
443 // TODO move this to a more global helper file
444 int nbits(loco::DataType dt) noexcept
448 case loco::DataType::S8:
449 case loco::DataType::U8:
451 case loco::DataType::S16:
452 case loco::DataType::U16:
453 case loco::DataType::FLOAT16:
455 case loco::DataType::S32:
456 case loco::DataType::U32:
457 case loco::DataType::FLOAT32:
459 case loco::DataType::S64:
462 return 64; // a safe large default
466 // TODO Check if the metric is valid
467 // Returns true if [min,max] is poorly representable
468 bool range_check(float min, float max, loco::DataType dtype)
471 return log2f(max) - log2f(min) > nbits(dtype) * thresh;
474 bool warn_scale_zp(float scale, int64_t zp, luci::CircleNode *n)
480 case loco::DataType::U8:
481 min = scale * (0 - zp);
482 max = scale * (255 - zp);
484 case loco::DataType::S16:
485 min = scale * (-32767);
486 max = scale * (32767);
491 return range_check(min, max, n->dtype());
496 void warn_accuracy_with_range(luci::CircleNode *n)
499 auto qp = n->quantparam();
500 auto k = qp->zerop.size();
501 for (uint32_t i = 0; i < k; i++)
503 if (warn_scale_zp(qp->scale[i], qp->zerop[i], n))
504 WARN(l) << "Quantization of " << i << "-th channel of " << n->name()
505 << "'s quantization may cause accuracy issues" << std::endl;