2 * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "QuantizeWeights.h"
18 #include "QuantizationUtils.h"
20 #include <luci/Service/Nodes/CircleConst.h>
33 using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
35 void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
37 loco::TensorShape dimension;
39 uint32_t indices[4] = {
43 if (!get_channel_dim_index(node, dimension, channel_dim_index))
49 for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
51 for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
53 for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
55 for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
57 func(indices, dimension, channel_dim_index);
64 void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
65 std::vector<float> &scaling_factor, int32_t &channel_dim_index)
67 assert(node->dtype() == loco::DataType::FLOAT32);
69 const int32_t kMinScale = 0;
70 const int32_t kMaxScale = 255;
72 uint32_t size = node->size<loco::DataType::FLOAT32>();
73 std::vector<int32_t> quantized_values(size);
75 auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
76 int channel_idx = indices[channel_dim_index];
77 const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
78 auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
79 quantized_values[cal_offset(dimension, indices)] =
80 static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
83 iterate_per_channel(node, channel_dim_index, quantize);
85 node->dtype(loco::DataType::U8); // change the type of tensor
86 node->size<loco::DataType::U8>(size); // resize tensor
87 for (uint32_t i = 0; i < size; ++i)
89 node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
93 // TODO Reduce duplicate code with QuantizeDequantizeWeights
94 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
95 std::vector<float> &scaling_factor, std::vector<int64_t> &zp,
96 std::vector<float> &nudged_min, std::vector<float> &nudged_max,
97 int32_t &channel_dim_index)
99 assert(node->dtype() == loco::DataType::FLOAT32);
100 const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
101 const int32_t kMinScale = -kMaxScale;
103 uint32_t size = node->size<loco::DataType::FLOAT32>();
104 std::vector<int32_t> quantized_values(size);
106 for (size_t i = 0; i < min.size(); ++i)
108 compute_sym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
111 auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
112 int channel_idx = indices[channel_dim_index];
113 const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
114 auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
115 data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
116 data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
117 quantized_values[cal_offset(dimension, indices)] =
118 static_cast<int32_t>(std::round(data * scaling_factor_inv));
121 iterate_per_channel(node, channel_dim_index, quantize);
123 node->dtype(loco::DataType::S16); // change the type of tensor
124 node->size<loco::DataType::S16>(size); // resize tensor
125 for (uint32_t i = 0; i < size; ++i)
127 node->at<loco::DataType::S16>(i) =
128 std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
132 void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
133 int32_t &channel_dim_index)
135 loco::TensorShape dimension;
138 if (!get_channel_dim_index(node, dimension, channel_dim_index))
140 throw std::runtime_error("Failed to find channel index in " + node->name());
142 auto size = dimension.dim(channel_dim_index).value();
144 std::vector<bool> has_min_max_value(size, false);
148 auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
149 int channel_idx = indices[channel_dim_index];
150 auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
151 if (has_min_max_value[channel_idx])
153 min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
154 max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
158 min[channel_idx] = data;
159 max[channel_idx] = data;
160 has_min_max_value[channel_idx] = true;
164 iterate_per_channel(node, channel_dim_index, cal_minmax);
167 void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
168 std::vector<float> &max, std::vector<float> &scaling_factor,
169 std::vector<int64_t> &zp, std::vector<float> &nudged_min,
170 std::vector<float> &nudged_max, int32_t &channel_dim_index)
172 assert(node->dtype() == loco::DataType::FLOAT32);
174 const int32_t kMinScale = 0;
175 const int32_t kMaxScale = 255;
177 uint32_t size = node->size<loco::DataType::FLOAT32>();
178 std::vector<int32_t> quantized_values(size);
180 for (size_t i = 0; i < min.size(); ++i)
182 compute_asym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
185 auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
186 int channel_idx = indices[channel_dim_index];
187 const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
188 auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
189 data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
190 data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
191 quantized_values[cal_offset(dimension, indices)] =
192 static_cast<int32_t>(std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
195 iterate_per_channel(node, channel_dim_index, quantize);
197 node->dtype(loco::DataType::U8); // change the type of tensor
198 node->size<loco::DataType::U8>(size); // resize tensor
199 for (uint32_t i = 0; i < size; ++i)
201 node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
205 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
206 int32_t &channel_dim_index)
208 assert(node->dtype() == loco::DataType::FLOAT32);
210 const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
211 const int32_t kMinScale = -kMaxScale;
213 uint32_t size = node->size<loco::DataType::FLOAT32>();
214 std::vector<int32_t> quantized_values(size);
216 auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
217 int channel_idx = indices[channel_dim_index];
218 const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
219 auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
220 quantized_values[cal_offset(dimension, indices)] =
221 static_cast<int32_t>(std::round(data * scaling_factor_inv));
224 iterate_per_channel(node, channel_dim_index, quantize);
226 node->dtype(loco::DataType::S16); // change the type of tensor
227 node->size<loco::DataType::S16>(size); // resize tensor
228 for (uint32_t i = 0; i < size; ++i)
230 node->at<loco::DataType::S16>(i) =
231 std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
235 void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
237 const int32_t kMinScale = 0;
238 const int32_t kMaxScale = 255;
240 uint32_t size = node->size<loco::DataType::FLOAT32>();
242 const float scaling_factor_inv = 1.0 / scaling_factor;
243 std::vector<int32_t> quantized_values(size);
244 for (uint32_t i = 0; i < size; ++i)
246 auto data = node->at<loco::DataType::FLOAT32>(i);
247 quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
250 node->dtype(loco::DataType::U8); // change the type of tensor
251 node->size<loco::DataType::U8>(size); // resize tensor
252 for (uint32_t i = 0; i < size; ++i)
254 node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
258 // Quantize const per channel
260 // The last dimension of const is the same as the dimension of channel
261 // And the rest of the const dimensions should be 1
262 // So, a 'single value' is quantized per channel
264 // Quantization spec (f: fp value, q: quantized value)
267 // Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
268 // Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1]
271 // Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
272 // Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0]
273 void quant_const_per_channel(CircleConst *node, loco::DataType quant_type)
275 assert(node->dtype() == loco::DataType::FLOAT32);
276 assert(node->rank() > 0);
278 for (uint32_t i = 0; i < node->rank() - 1; i++)
280 // Caller should call this function when the below condition is satisfied
281 if (node->dim(i).value() != 1)
282 throw std::runtime_error("Non-channel dimension of const node must be 1");
285 uint32_t size = node->size<loco::DataType::FLOAT32>();
286 assert(size == node->dim(node->rank() - 1).value());
288 auto quantparam = std::make_unique<CircleQuantParam>();
289 quantparam->quantized_dimension = node->rank() - 1;
290 std::vector<int32_t> quantized_data(size);
292 for (uint32_t i = 0; i < size; ++i)
294 auto data = node->at<loco::DataType::FLOAT32>(i);
295 if (quant_type == loco::DataType::U8)
299 quantparam->scale.push_back(data);
300 quantparam->zerop.push_back(0);
301 quantized_data[i] = 1;
305 quantparam->scale.push_back(-data);
306 quantparam->zerop.push_back(1);
307 quantized_data[i] = 0;
310 else if (quant_type == loco::DataType::S16)
314 quantparam->scale.push_back(data);
315 quantized_data[i] = 1;
319 quantparam->scale.push_back(-data);
320 quantized_data[i] = -1;
322 quantparam->zerop.push_back(0);
325 node->quantparam(std::move(quantparam));
329 case loco::DataType::U8:
330 node->dtype(loco::DataType::U8);
331 node->size<loco::DataType::U8>(size);
332 for (uint32_t i = 0; i < size; ++i)
334 assert(quantized_data[i] == 0 || quantized_data[i] == 1);
335 node->at<loco::DataType::U8>(i) = quantized_data[i];
338 case loco::DataType::S16:
339 node->dtype(loco::DataType::S16);
340 node->size<loco::DataType::S16>(size);
341 for (uint32_t i = 0; i < size; ++i)
343 assert(quantized_data[i] == -1 || quantized_data[i] == 1);
344 node->at<loco::DataType::S16>(i) = quantized_data[i];
348 throw std::runtime_error("Unsupported data type");
357 void QuantizeWeights::quantize_weights(luci::CircleConst *weights)
359 // Find min/max per channel-wise
360 if (granularity == QuantizationGranularity::ChannelWise)
362 auto quantparam = weights->quantparam();
363 if (quantparam == nullptr)
365 // Find min/max on the fly
366 // NOTE This is for the case when QuantizeDequantizeWeights is skipped
367 // TODO Reduce duplicate codes
368 std::vector<float> min;
369 std::vector<float> max;
370 int32_t channel_dim_index = 0;
372 cal_minmax_per_channel(weights, min, max, channel_dim_index);
374 std::vector<float> nudged_min(min.size());
375 std::vector<float> nudged_max(min.size());
376 std::vector<float> scaling_factor(min.size());
377 std::vector<int64_t> zp(min.size());
379 if (output_type == loco::DataType::U8)
381 asymmetric_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max,
386 sym_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max,
390 auto quantparam = std::make_unique<CircleQuantParam>();
391 quantparam->scale = scaling_factor;
392 quantparam->zerop = zp;
393 quantparam->quantized_dimension = channel_dim_index;
394 weights->quantparam(std::move(quantparam));
399 auto min = quantparam->min;
400 auto scaling_factor = quantparam->scale;
401 int32_t channel_dim_index = 0;
403 if (output_type == loco::DataType::U8)
405 asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index);
409 sym_wquant_per_channel(weights, scaling_factor, channel_dim_index);
411 quantparam->min.clear();
412 quantparam->max.clear();
413 quantparam->quantized_dimension = channel_dim_index;
415 // Find min/max per layer-wise
418 auto quantparam = weights->quantparam();
419 if (quantparam == nullptr)
421 // Find min/max on the fly
422 // NOTE This is for the case when QuantizeDequantizeWeights is skipped
423 // TODO Reduce duplicate codes
424 float min = std::numeric_limits<float>::max();
425 float max = std::numeric_limits<float>::lowest();
426 for (uint32_t i = 0; i < weights->size<loco::DataType::FLOAT32>(); i++)
428 auto data = weights->at<loco::DataType::FLOAT32>(i);
429 min = data < min ? data : min;
430 max = data > max ? data : max;
432 float scaling_factor{0};
437 asymmetric_wquant_with_minmax_per_layer(weights, min, max, scaling_factor, zp, nudged_min,
439 auto quantparam = std::make_unique<CircleQuantParam>();
440 quantparam->scale.push_back(scaling_factor);
441 quantparam->zerop.push_back(zp);
442 weights->quantparam(std::move(quantparam));
446 // Quantize using recorded quantparam
447 assert(quantparam != nullptr);
448 assert(quantparam->min.size() == 1); // only support layer-wise quant
449 assert(quantparam->scale.size() == 1); // only support layer-wise quant
450 auto min = quantparam->min[0];
451 auto scaling_factor = quantparam->scale[0];
452 asym_wquant_per_layer(weights, min, scaling_factor);
453 quantparam->min.clear();
454 quantparam->max.clear();
457 void QuantizeWeights::visit(luci::CircleConv2D *node)
460 INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
462 auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
463 if (!is_quantized(weights))
465 auto new_weights = luci::clone(weights);
466 node->filter(new_weights);
467 quantize_weights(new_weights);
471 void QuantizeWeights::visit(luci::CircleDepthwiseConv2D *node)
474 INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
476 auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
477 if (!is_quantized(weights))
479 auto new_weights = luci::clone(weights);
480 node->filter(new_weights);
481 quantize_weights(new_weights);
485 void QuantizeWeights::visit(luci::CircleInstanceNorm *node)
488 INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
490 auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
491 auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
493 if (!is_quantized(gamma))
495 assert(gamma->dtype() == loco::DataType::FLOAT32);
496 auto new_gamma = luci::clone(gamma);
497 if (granularity == QuantizationGranularity::LayerWise)
498 quant_const(new_gamma, output_type);
499 else if (granularity == QuantizationGranularity::ChannelWise)
500 quant_const_per_channel(new_gamma, output_type);
501 node->gamma(new_gamma);
503 if (!is_quantized(beta))
505 assert(beta->dtype() == loco::DataType::FLOAT32);
506 auto new_beta = luci::clone(beta);
507 if (granularity == QuantizationGranularity::LayerWise)
508 quant_const(new_beta, output_type);
509 else if (granularity == QuantizationGranularity::ChannelWise)
510 quant_const_per_channel(new_beta, output_type);
511 node->beta(new_beta);
515 void QuantizeWeights::visit(luci::CirclePRelu *node)
518 INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
520 auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
522 if (!is_quantized(alpha))
524 assert(alpha->dtype() == loco::DataType::FLOAT32);
525 auto new_alpha = luci::clone(alpha);
526 if (granularity == QuantizationGranularity::LayerWise)
527 quant_const(new_alpha, output_type);
528 else if (granularity == QuantizationGranularity::ChannelWise)
529 quant_const_per_channel(new_alpha, output_type);
530 node->alpha(new_alpha);
534 void QuantizeWeights::visit(luci::CircleTransposeConv *node)
537 INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
539 auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
540 if (!is_quantized(weights))
542 auto new_weights = luci::clone(weights);
543 node->filter(new_weights);
544 quantize_weights(new_weights);
548 void QuantizeWeights::visit(luci::CircleFullyConnected *node)
551 INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
553 auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
554 if (!is_quantized(weights))
556 auto new_weights = luci::clone(weights);
557 node->weights(new_weights);
558 quantize_weights(new_weights);
562 void QuantizeWeights::visit(luci::CircleNode *) {}