2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "luci/Pass/QuantizeWithMinMaxPass.h"
18 #include "QuantizationUtils.h"
20 #include <luci/IR/CircleNodes.h>
21 #include <luci/IR/CircleNodeVisitor.h>
24 #include <oops/UserExn.h>
35 void overwrite_quantparam(luci::CircleConcatenation *concat, luci::CircleNode *target)
37 auto concat_qparam = concat->quantparam();
38 if (concat_qparam == nullptr)
39 throw std::runtime_error("quantparam of concat is not found during overwrite");
41 auto target_qparam = target->quantparam();
42 if (target_qparam == nullptr)
44 auto quantparam = std::make_unique<CircleQuantParam>();
45 target->quantparam(std::move(quantparam));
46 target_qparam = target->quantparam();
48 target_qparam->min = concat_qparam->min;
49 target_qparam->max = concat_qparam->max;
50 target_qparam->scale = concat_qparam->scale;
51 target_qparam->zerop = concat_qparam->zerop;
52 target_qparam->quantized_dimension = concat_qparam->quantized_dimension;
55 void quant_const_values(luci::CircleConst *const_node, float scaling_factor, float zerop,
56 loco::DataType quant_type)
58 uint32_t size = const_node->size<loco::DataType::FLOAT32>();
60 const float scaling_factor_inv = 1.0 / scaling_factor;
61 std::vector<int32_t> quantized_values(size);
62 for (uint32_t i = 0; i < size; ++i)
64 auto data = const_node->at<loco::DataType::FLOAT32>(i);
65 quantized_values[i] = static_cast<int32_t>(std::round(data * scaling_factor_inv) + zerop);
70 case loco::DataType::U8:
71 const_node->dtype(loco::DataType::U8); // change the type of tensor
72 const_node->size<loco::DataType::U8>(size); // resize tensor
73 for (uint32_t i = 0; i < size; ++i)
74 const_node->at<loco::DataType::U8>(i) = std::min(255, std::max(0, quantized_values[i]));
76 case loco::DataType::S16:
78 const_node->dtype(loco::DataType::S16); // change the type of tensor
79 const_node->size<loco::DataType::S16>(size); // resize tensor
80 for (uint32_t i = 0; i < size; ++i)
81 const_node->at<loco::DataType::S16>(i) =
82 std::min(32767, std::max(-32767, quantized_values[i]));
85 throw std::runtime_error("Unsupported data type");
89 void quant_const(CircleConst *node, loco::DataType quant_type)
91 assert(node->dtype() == loco::DataType::FLOAT32);
93 float min = std::numeric_limits<float>::max();
94 float max = std::numeric_limits<float>::lowest();
95 for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
97 auto data = node->at<loco::DataType::FLOAT32>(i);
98 min = data < min ? data : min;
99 max = data > max ? data : max;
102 float scaling_factor{0.0};
104 float nudged_min{0.0};
105 float nudged_max{0.0};
109 case loco::DataType::U8:
110 asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
113 case loco::DataType::S16:
114 symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
118 throw std::runtime_error("Unsupported data type");
121 auto quantparam = std::make_unique<CircleQuantParam>();
122 quantparam->scale.push_back(scaling_factor);
123 quantparam->zerop.push_back(zp);
124 node->quantparam(std::move(quantparam));
127 // Check if the node is the bias of Conv2D, DepthwiseConv2D, FullyConnected, or TransposeConv layer
128 // If true, return <input, weight> pair of the successor node (used to quantize bias)
129 // If flase, return <nullptr, nullptr>
130 std::pair<loco::Node *, loco::Node *> get_input_weight_of_bias(CircleNode *node)
132 auto circle_const = dynamic_cast<CircleConst *>(node);
133 if (circle_const == nullptr)
134 return std::make_pair(nullptr, nullptr);
136 auto succs = loco::succs(node);
137 if (succs.size() != 1) // assume bias is used by only one node
138 return std::make_pair(nullptr, nullptr);
140 for (auto out : succs)
142 auto conv = dynamic_cast<CircleConv2D *>(out);
143 if (conv != nullptr && conv->bias() == circle_const)
145 assert(conv->input() != nullptr);
146 assert(conv->filter() != nullptr);
147 return std::make_pair(conv->input(), conv->filter());
149 auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
150 if (dw_conv != nullptr && dw_conv->bias() == circle_const)
152 assert(dw_conv->input() != nullptr);
153 assert(dw_conv->filter() != nullptr);
154 return std::make_pair(dw_conv->input(), dw_conv->filter());
156 auto fc = dynamic_cast<CircleFullyConnected *>(out);
157 if (fc != nullptr && fc->bias() == circle_const)
159 assert(fc->input() != nullptr);
160 assert(fc->weights() != nullptr);
161 return std::make_pair(fc->input(), fc->weights());
163 auto tconv = dynamic_cast<CircleTransposeConv *>(out);
164 if (tconv != nullptr && tconv->bias() == circle_const)
166 assert(tconv->outBackprop() != nullptr);
167 assert(tconv->filter() != nullptr);
168 return std::make_pair(tconv->outBackprop(), tconv->filter());
171 return std::make_pair(nullptr, nullptr);
174 void asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
175 float *scaling_factor, int64_t *zp)
177 float scale = input_scale * weight_scale;
178 const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
180 uint32_t size = node->size<loco::DataType::FLOAT32>();
181 std::vector<int32_t> quantized_values(size);
182 for (uint32_t i = 0; i < size; ++i)
184 quantized_values[i] =
185 static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
188 node->dtype(loco::DataType::S32); // change the type of tensor
189 node->size<loco::DataType::S32>(size); // resize tensor
190 const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
191 const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
192 for (uint32_t i = 0; i < size; ++i)
194 node->at<loco::DataType::S32>(i) =
195 std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
197 *scaling_factor = scale;
201 void quant_bias_per_channel(CircleConst *node, float input_scale, std::vector<float> &weight_scale,
202 std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
204 float scaling_factor_inv{0};
206 uint32_t size = node->size<loco::DataType::FLOAT32>();
207 std::vector<int32_t> quantized_values(size);
209 for (uint32_t i = 0; i < size; ++i)
211 scaling_factor[i] = input_scale * weight_scale[i];
212 scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
213 quantized_values[i] =
214 static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
218 node->dtype(loco::DataType::S32); // change the type of tensor
219 node->size<loco::DataType::S32>(size); // resize tensor
220 const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
221 const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
222 for (uint32_t i = 0; i < size; ++i)
224 node->at<loco::DataType::S32>(i) =
225 std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
229 void int16_quant_bias_per_channel(CircleConst *node, float input_scale,
230 std::vector<float> &weight_scale,
231 std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
233 float scaling_factor_inv{0};
235 uint32_t size = node->size<loco::DataType::FLOAT32>();
236 std::vector<int64_t> quantized_values(size);
238 for (uint32_t i = 0; i < size; ++i)
240 scaling_factor[i] = input_scale * weight_scale[i];
241 scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
242 quantized_values[i] =
243 static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
247 node->dtype(loco::DataType::S64); // change the type of tensor
248 node->size<loco::DataType::S64>(size); // resize tensor
249 for (uint32_t i = 0; i < size; ++i)
251 node->at<loco::DataType::S64>(i) = quantized_values[i];
255 bool has_min_max(const CircleNode *node)
257 return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
260 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
261 int32_t &channel_dim_index)
263 assert(node->dtype() == loco::DataType::FLOAT32);
265 const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
266 const int32_t kMinScale = -kMaxScale;
268 uint32_t size = node->size<loco::DataType::FLOAT32>();
269 std::vector<int32_t> quantized_values(size);
271 loco::TensorShape dimension;
273 uint32_t indices[4] = {
277 if (!get_channel_dim_index(node, dimension, channel_dim_index))
283 for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
285 for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
287 for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
289 for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
291 int channel_idx = indices[channel_dim_index];
292 const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
293 auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
294 quantized_values[cal_offset(dimension, indices)] =
295 static_cast<int32_t>(std::round(data * scaling_factor_inv));
301 node->dtype(loco::DataType::S16); // change the type of tensor
302 node->size<loco::DataType::S16>(size); // resize tensor
303 for (uint32_t i = 0; i < size; ++i)
305 node->at<loco::DataType::S16>(i) =
306 std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
310 void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
311 std::vector<float> &scaling_factor, int32_t &channel_dim_index)
313 assert(node->dtype() == loco::DataType::FLOAT32);
315 const int32_t kMinScale = 0;
316 const int32_t kMaxScale = 255;
318 uint32_t size = node->size<loco::DataType::FLOAT32>();
319 std::vector<int32_t> quantized_values(size);
321 loco::TensorShape dimension;
323 uint32_t indices[4] = {
327 if (!get_channel_dim_index(node, dimension, channel_dim_index))
333 for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
335 for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
337 for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
339 for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
341 int channel_idx = indices[channel_dim_index];
342 const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
343 auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
344 quantized_values[cal_offset(dimension, indices)] =
345 static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
351 node->dtype(loco::DataType::U8); // change the type of tensor
352 node->size<loco::DataType::U8>(size); // resize tensor
353 for (uint32_t i = 0; i < size; ++i)
355 node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
359 void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
361 const int32_t kMinScale = 0;
362 const int32_t kMaxScale = 255;
364 uint32_t size = node->size<loco::DataType::FLOAT32>();
366 const float scaling_factor_inv = 1.0 / scaling_factor;
367 std::vector<int32_t> quantized_values(size);
368 for (uint32_t i = 0; i < size; ++i)
370 auto data = node->at<loco::DataType::FLOAT32>(i);
371 quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
374 node->dtype(loco::DataType::U8); // change the type of tensor
375 node->size<loco::DataType::U8>(size); // resize tensor
376 for (uint32_t i = 0; i < size; ++i)
378 node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
383 * @brief QuantizeActivation quantizes tensors for activations
384 * @details Quantize using recorded min/max values
386 struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
388 QuantizeActivation(loco::DataType input, loco::DataType output)
389 : input_type(input), output_type(output)
393 loco::DataType input_type;
394 loco::DataType output_type;
396 // Quantize input tensors of each node
397 bool visit(luci::CircleNode *node)
400 INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
401 auto arity = node->arity();
402 for (uint32_t i = 0; i < arity; i++)
404 auto input_node = node->arg(i);
405 auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
407 // Check if this is already quantized
408 if (is_quantized(circle_node))
411 // Check if this is bias (bias is quantized later)
412 auto iw = get_input_weight_of_bias(circle_node);
413 if (iw.first != nullptr && iw.second != nullptr)
416 // Check if this is activation
417 // We assume min/max are recorded only for activations
418 if (has_min_max(circle_node) && !is_weights(circle_node))
420 // Quantize using recorded min/max
421 auto quantparam = circle_node->quantparam();
422 assert(quantparam->min.size() == 1); // only support layer-wise quant
423 assert(quantparam->max.size() == 1); // only support layer-wise quant
424 auto min = quantparam->min[0];
425 auto max = quantparam->max[0];
427 float scaling_factor{0};
432 if (output_type == loco::DataType::U8)
434 compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
435 circle_node->dtype(loco::DataType::U8);
439 compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
440 circle_node->dtype(loco::DataType::S16);
443 circle_node->quantparam()->min.clear();
444 circle_node->quantparam()->max.clear();
445 circle_node->quantparam()->scale.push_back(scaling_factor);
446 circle_node->quantparam()->zerop.push_back(zp);
453 struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
455 QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
456 : input_type(input), output_type(output), granularity(gr)
460 loco::DataType input_type;
461 loco::DataType output_type;
462 QuantizationGranularity granularity;
464 // Quantize bias node
465 bool visit(luci::CircleNode *node)
467 // Check if this is already quantized
468 if (is_quantized(node))
471 // Check if this is bias
472 auto iw = get_input_weight_of_bias(node);
473 if (iw.first == nullptr || iw.second == nullptr)
476 auto input = loco::must_cast<luci::CircleNode *>(iw.first);
477 auto weight = loco::must_cast<luci::CircleNode *>(iw.second);
479 if (granularity == QuantizationGranularity::ChannelWise)
481 assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
482 auto input_scale = input->quantparam()->scale[0];
484 assert(weight->quantparam() != nullptr); // weight scale's channel-wise
485 auto weight_scale = weight->quantparam()->scale;
487 auto circle_const = loco::must_cast<luci::CircleConst *>(node);
489 uint32_t size = circle_const->size<loco::DataType::FLOAT32>();
490 assert(size == weight_scale.size());
491 std::vector<float> scaling_factor(size);
492 std::vector<int64_t> zp(size);
494 if (output_type == loco::DataType::U8)
496 quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
498 else if (output_type == loco::DataType::S16)
500 int16_quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
504 throw std::runtime_error("Unsupported quantization type.");
507 auto quantparam = std::make_unique<CircleQuantParam>();
508 quantparam->scale = scaling_factor;
509 quantparam->zerop = zp;
510 assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
511 circle_const->quantparam(std::move(quantparam));
515 assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
516 auto input_scale = input->quantparam()->scale[0];
518 assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
519 auto weight_scale = weight->quantparam()->scale[0];
521 auto circle_const = loco::must_cast<luci::CircleConst *>(node);
522 float scaling_factor{0};
524 asym_quant_bias_per_layer(circle_const, input_scale, weight_scale, &scaling_factor, &zp);
525 auto quantparam = std::make_unique<CircleQuantParam>();
526 quantparam->scale.push_back(scaling_factor);
527 quantparam->zerop.push_back(zp);
528 assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
529 circle_const->quantparam(std::move(quantparam));
536 * @brief QuantizeWeights quantizes tensors for weights
537 * @details Find min/max values on the fly and then quantize
539 struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
541 QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
542 : input_type(input), output_type(output), granularity(gr)
546 loco::DataType input_type;
547 loco::DataType output_type;
548 QuantizationGranularity granularity;
550 // Quantize input tensors of each node
551 bool visit(luci::CircleNode *node)
554 INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
555 auto arity = node->arity();
556 for (uint32_t i = 0; i < arity; i++)
558 auto input_node = node->arg(i);
559 auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
561 // Check if this is already quantized
562 if (is_quantized(circle_node))
565 if (is_weights(circle_node))
567 auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
569 // Find min/max per channel-wise
570 if (granularity == QuantizationGranularity::ChannelWise)
572 auto quantparam = circle_node->quantparam();
573 if (quantparam == nullptr)
575 assert(false && "quantparam is nullptr");
579 auto min = quantparam->min;
580 auto scaling_factor = quantparam->scale;
581 int32_t channel_dim_index = 0;
583 if (output_type == loco::DataType::U8)
585 asym_wquant_per_channel(circle_const, min, scaling_factor, channel_dim_index);
589 sym_wquant_per_channel(circle_const, scaling_factor, channel_dim_index);
591 quantparam->min.clear();
592 quantparam->max.clear();
593 quantparam->quantized_dimension = channel_dim_index;
595 // Find min/max per layer-wise
598 // Quantize using recorded quantparam
599 auto quantparam = circle_node->quantparam();
600 assert(quantparam != nullptr);
601 assert(quantparam->min.size() == 1); // only support layer-wise quant
602 assert(quantparam->scale.size() == 1); // only support layer-wise quant
603 auto min = quantparam->min[0];
604 auto scaling_factor = quantparam->scale[0];
605 asym_wquant_per_layer(circle_const, min, scaling_factor);
606 quantparam->min.clear();
607 quantparam->max.clear();
616 * @brief Quantize const input tensors using min/max of const values
618 void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
620 auto opcode = node->opcode();
621 auto arity = node->arity();
623 loco::Node *input_node{nullptr};
624 luci::CircleConst *const_node{nullptr};
628 case luci::CircleOpcode::CONV_2D:
629 case luci::CircleOpcode::DEPTHWISE_CONV_2D:
630 case luci::CircleOpcode::FULLY_CONNECTED:
631 case luci::CircleOpcode::TRANSPOSE_CONV:
632 // Handled in QuantizeWeights and QuantizeBias
635 case luci::CircleOpcode::CONCATENATION:
636 // Handled in propagate_concat_quantparam
639 case luci::CircleOpcode::ARG_MAX:
640 case luci::CircleOpcode::ARG_MIN:
641 case luci::CircleOpcode::MEAN:
642 case luci::CircleOpcode::PAD:
643 case luci::CircleOpcode::REDUCE_ANY:
644 case luci::CircleOpcode::REDUCE_PROD:
645 case luci::CircleOpcode::REDUCE_MAX:
646 case luci::CircleOpcode::REDUCE_MIN:
647 case luci::CircleOpcode::RESHAPE:
648 case luci::CircleOpcode::RESIZE_BILINEAR:
649 case luci::CircleOpcode::RESIZE_NEAREST_NEIGHBOR:
650 case luci::CircleOpcode::REVERSE_SEQUENCE:
651 case luci::CircleOpcode::SUM:
652 case luci::CircleOpcode::TILE:
653 case luci::CircleOpcode::TOPK_V2:
654 case luci::CircleOpcode::TRANSPOSE:
655 // The second input of these Ops should not be quantized
656 // Ex: axis, paddings
657 input_node = node->arg(0);
658 const_node = dynamic_cast<luci::CircleConst *>(input_node);
659 if (const_node != nullptr)
660 quant_const(const_node, output_type);
663 case luci::CircleOpcode::ADD:
664 case luci::CircleOpcode::ADD_N:
665 case luci::CircleOpcode::DIV:
666 case luci::CircleOpcode::EQUAL:
667 case luci::CircleOpcode::GREATER:
668 case luci::CircleOpcode::GREATER_EQUAL:
669 case luci::CircleOpcode::INSTANCE_NORM:
670 case luci::CircleOpcode::LESS:
671 case luci::CircleOpcode::LESS_EQUAL:
672 case luci::CircleOpcode::MAXIMUM:
673 case luci::CircleOpcode::MINIMUM:
674 case luci::CircleOpcode::MUL:
675 case luci::CircleOpcode::NOT_EQUAL:
676 case luci::CircleOpcode::PRELU:
677 case luci::CircleOpcode::SUB:
678 // Quantize all const inputs using their values
679 for (uint32_t i = 0; i < arity; i++)
681 input_node = node->arg(i);
682 const_node = dynamic_cast<luci::CircleConst *>(input_node);
683 if (const_node != nullptr)
684 quant_const(const_node, output_type);
689 for (uint32_t i = 0; i < arity; i++)
691 input_node = node->arg(i);
692 const_node = dynamic_cast<luci::CircleConst *>(input_node);
693 if (const_node != nullptr)
694 throw std::runtime_error("Unsupported Op for const inputs");
704 * [CircleNode] [CircleConst]
705 * (U8 qparam1) (FP32)
708 * [CircleConcatenation]
712 * [CircleNode] [CircleConst]
713 * (U8 qparam2) (U8 qparam2)
716 * [CircleConcatenation]
719 void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataType quant_type)
721 assert(concat->quantparam() != nullptr);
723 const auto num_inputs = concat->numValues();
725 // Quantize const inputs using their values if concat has fused act function
726 if (concat->fusedActivationFunction() != luci::FusedActFunc::NONE)
728 for (uint32_t i = 0; i < num_inputs; i++)
730 auto node = concat->arg(i);
731 auto const_node = dynamic_cast<luci::CircleConst *>(node);
732 if (const_node != nullptr)
733 quant_const(const_node, quant_type);
738 for (uint32_t i = 0; i < num_inputs; i++)
740 auto node = loco::must_cast<luci::CircleNode *>(concat->arg(i));
742 // Skip if this input is CONCAT Op
743 if (node->opcode() == luci::CircleOpcode::CONCATENATION)
746 // Skip if this input is used by other Ops
747 auto succs = loco::succs(node);
748 if (succs.size() != 1)
750 if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
752 luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
753 quant_const(const_node, quant_type);
758 assert(succs.find(concat) != succs.end());
760 // Quantize constant values
761 if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
763 luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
764 if (const_node->dtype() != loco::DataType::FLOAT32)
765 throw std::runtime_error("Unsupported data type for constant input of concatenation Op");
767 const auto concat_qparam = concat->quantparam();
768 if (concat_qparam == nullptr)
769 throw std::runtime_error("quantparam of concat is not found during propagation");
771 assert(concat_qparam->scale.size() == 1);
772 const auto scaling_factor = concat_qparam->scale[0];
773 const auto zerop = concat_qparam->zerop[0];
775 quant_const_values(const_node, scaling_factor, zerop, quant_type);
779 // Non-const input must have been quantized
780 assert(node->quantparam() != nullptr);
783 overwrite_quantparam(concat, node);
787 bool QuantizeWithMinMaxPass::run(loco::Graph *g)
790 INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
792 // Quantize activation
793 for (auto node : loco::active_nodes(loco::output_nodes(g)))
795 QuantizeActivation qa(_input_dtype, _output_dtype);
796 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
797 circle_node->accept(&qa);
801 for (auto node : loco::active_nodes(loco::output_nodes(g)))
803 QuantizeWeights qw(_input_dtype, _output_dtype, _granularity);
804 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
805 circle_node->accept(&qw);
809 for (auto node : loco::active_nodes(loco::output_nodes(g)))
811 QuantizeBias qb(_input_dtype, _output_dtype, _granularity);
812 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
813 circle_node->accept(&qb);
816 // Quantize const inputs other than weights and bias
817 for (auto node : loco::active_nodes(loco::output_nodes(g)))
819 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
820 quantize_const_inputs(circle_node, _output_dtype);
823 // Propagate quantization parameters of concat Op
824 for (auto node : loco::active_nodes(loco::output_nodes(g)))
826 auto concat = dynamic_cast<luci::CircleConcatenation *>(node);
830 // Propagate qparam of concat to its inputs if
831 // (1) concat is uint8-quantized
832 // (2) concat has no fused activation function
833 // (3) the input is not concatenation Op
834 // (4) the input is not produced to Ops other than concat
835 propagate_concat_quantparam(concat, _output_dtype);
838 // Update output dtype
839 auto graph_outputs = g->outputs();
840 for (auto node : loco::output_nodes(g))
842 auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
843 if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
845 circle_node->dtype(_output_dtype);
846 auto graph_output = graph_outputs->at(circle_node->index());
847 graph_output->dtype(_output_dtype);
851 INFO(l) << "QuantizeWithMinMaxPass End" << std::endl;
852 return false; // one time run