2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "luci/Pass/QuantizeWithMinMaxPass.h"
18 #include "luci/Pass/PropagateQParamForwardPass.h"
19 #include "luci/Pass/PropagateQParamBackwardPass.h"
20 #include "luci/Pass/RemoveRedundantQuantizePass.h"
21 #include "QuantizeActivation.h"
22 #include "QuantizeWeights.h"
23 #include "QuantizeBias.h"
24 #include "QuantizationUtils.h"
25 #include "ProgressReporter.h"
26 #include "helpers/LayerInfoMap.h"
28 #include <luci/IR/CircleNodes.h>
29 #include <luci/IR/CircleNodeVisitor.h>
30 #include <luci/Service/Nodes/CircleConst.h>
31 #include <luci/Profile/CircleNodeOrigin.h>
33 #include <logo/Phase.h>
43 bool use_predefined_values(ActivationQType qtype)
47 case ActivationQType::PreDefinedLogistic:
48 case ActivationQType::PreDefinedTanh:
49 case ActivationQType::PreDefinedSoftmax:
52 // This ensures this switch-statement handles all ActivationQTypes
53 assert(qtype == ActivationQType::IntScale or qtype == ActivationQType::MinMax);
60 // Create a Quantize Op whose
62 // shape is the same with node
63 // qparam is computed according to node's qtype
64 luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type)
66 auto quantize = node->graph()->nodes()->create<CircleQuantize>();
67 quantize->name(node->name() + "_Quantize");
68 quantize->dtype(out_type);
69 quantize->rank(node->rank());
70 for (uint32_t i = 0; i < node->rank(); i++)
71 quantize->dim(i).set(node->dim(i).value());
73 quantize->shape_status(luci::ShapeStatus::VALID);
75 auto qparam = node->quantparam();
76 assert(qparam); // FIX_CALLER_UNLESS
78 auto qtype = luci::activation_qtype(node);
79 if (use_predefined_values(qtype))
81 quantize->quantparam(luci::make_predefined_qparam(qtype, out_type));
85 assert(qtype == ActivationQType::MinMax or qtype == ActivationQType::IntScale);
87 assert(qparam->min.size() == 1); // FIX_CALLER_UNLESS
88 assert(qparam->max.size() == 1); // FIX_CALLER_UNLESS
89 auto min = qparam->min[0];
90 auto max = qparam->max[0];
92 float scaling_factor{0};
97 if (out_type == loco::DataType::U8)
99 compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
103 assert(out_type == loco::DataType::S16);
104 compute_sym_scale(min, max, scaling_factor, nudged_min, nudged_max);
107 auto quantparam = std::make_unique<CircleQuantParam>();
108 quantparam->scale.push_back(scaling_factor);
109 quantparam->zerop.push_back(zp);
110 // Save original min/max (not nudged_min/max). Nudged min/max
111 // is different from the real min/max values, causing wrong
112 // qparam when quantization dtype is changed.
113 quantparam->min.push_back(min);
114 quantparam->max.push_back(max);
116 quantize->quantparam(std::move(quantparam));
118 if (qtype == ActivationQType::IntScale)
119 set_int_scale(quantize);
124 // Create Dequantize Op whose shape is the same with node
125 luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
127 auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
128 dequantize->name(node->name() + "_Dequantize");
129 dequantize->dtype(loco::DataType::FLOAT32);
130 dequantize->rank(node->rank());
131 for (uint32_t i = 0; i < node->rank(); i++)
132 dequantize->dim(i).set(node->dim(i).value());
134 dequantize->shape_status(luci::ShapeStatus::VALID);
136 luci::add_origin(dequantize, luci::get_origin(node));
150 * Insert Quantize operator for mixed-precision quantization
151 * 1. Before input feature map (only for non-const)
152 * 2. After output feature map
154 * For example, if default_dtype = U8 and op_dtype = S16,
155 * 1. Quantize (U8->S16) is inserted before ifm
156 * 2. Quantize (S16->U8) is inserted after ofm
158 * Why not insert Quantize Op for const ifm?
159 * We quantize const tensor at once to preserve precision.
160 * For example, if default dtype = U8, op_dtype = S16, and op is CONV2D,
161 * We directly quantize weights to 16 bits, not 8->16 bits.
163 struct InsertQuantizeOp final : public luci::CircleNodeMutableVisitor<void>
165 InsertQuantizeOp(loco::DataType default_dtype, loco::DataType op_dtype)
166 : _default_dtype(default_dtype), _op_dtype(op_dtype)
168 assert(default_dtype != op_dtype); // FIX_CALLER_UNLESS
172 loco::DataType _default_dtype;
173 loco::DataType _op_dtype;
176 luci::CircleQuantize *create_in_quantize(loco::Node *in, loco::Node *origin)
178 auto input = loco::must_cast<luci::CircleNode *>(in);
179 if (input->opcode() == luci::CircleOpcode::CIRCLECONST)
182 // input is not quantizable (ex: index)
183 if (input->quantparam() == nullptr)
186 auto input_quant = create_quantize_op(input, _op_dtype);
187 input_quant->input(input);
188 auto origin_node = loco::must_cast<luci::CircleNode *>(origin);
189 luci::add_origin(input_quant, luci::get_origin(origin_node));
193 void insert_out_quantize(loco::Node *node)
195 auto output = loco::must_cast<luci::CircleNode *>(node);
196 assert(output->opcode() != luci::CircleOpcode::CIRCLECONST); // FIX_CALLER_UNLESS
198 // output is not quantizable (ex: index)
199 if (output->quantparam() == nullptr)
202 auto output_quant = create_quantize_op(output, _default_dtype);
204 luci::add_origin(output_quant, luci::get_origin(output));
205 loco::replace(node).with(output_quant);
206 output_quant->input(node);
209 // INPUT_NAME is the only activation of NODE
210 #define INSERT_QUANTIZE_TO_UNARY_OP(NODE, INPUT_NAME) \
211 void visit(NODE *node) \
213 if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node)) \
214 node->INPUT_NAME(input_quant); \
216 insert_out_quantize(node); \
219 // INPUT_NAME is the only activation of NODE
220 #define INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(NODE, INPUT_NAME, OUT_NAME) \
221 void visit(NODE *node) \
223 if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node)) \
224 node->INPUT_NAME(input_quant); \
226 auto out_nodes = loco::succs(node); \
227 for (auto out_node : out_nodes) \
229 auto out_circle = loco::must_cast<OUT_NAME *>(out_node); \
230 insert_out_quantize(out_circle); \
234 // INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE
235 #define INSERT_QUANTIZE_TO_BINARY_OP(NODE, INPUT_NAME1, INPUT_NAME2) \
236 void visit(NODE *node) \
238 if (auto input1_quant = create_in_quantize(node->INPUT_NAME1(), node)) \
239 node->INPUT_NAME1(input1_quant); \
241 if (auto input2_quant = create_in_quantize(node->INPUT_NAME2(), node)) \
242 node->INPUT_NAME2(input2_quant); \
244 insert_out_quantize(node); \
247 // Default behavior (NYI)
248 void visit(luci::CircleNode *node)
250 throw std::runtime_error("Unsupported Op for mixed-precision quantization. Layer name: " +
255 void visit(luci::CircleOutput *) {}
256 void visit(luci::CircleSplitVOut *) {}
257 void visit(luci::CircleSplitOut *) {}
258 void visit(luci::CircleTopKV2Out *) {}
259 void visit(luci::CircleUniqueOut *) {}
260 void visit(luci::CircleUnpackOut *) {}
262 // Ops that receive a single activation as an input
263 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAbs, x)
264 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAveragePool2D, value)
265 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleBatchToSpaceND, input)
266 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleConv2D, input)
267 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthToSpace, input)
268 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthwiseConv2D, input)
269 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleElu, features)
270 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleExp, x)
271 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFloor, x)
272 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input)
273 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params)
274 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGelu, features)
275 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input)
276 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLeakyRelu, features)
277 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input)
278 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x)
279 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value)
280 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input)
281 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input)
282 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleNeg, x)
283 INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input)
284 INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input)
285 INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input)
286 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceProd, input)
287 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input)
288 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input)
289 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features)
290 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu6, features)
291 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor)
292 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input)
293 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input)
294 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReverseSequence, input)
295 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRsqrt, x)
296 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSlice, input)
297 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits)
298 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input)
299 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input)
300 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqueeze, input)
301 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x)
302 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input)
303 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input)
304 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTanh, x)
305 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTile, input)
306 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTranspose, a)
307 INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTransposeConv, outBackprop)
309 // Ops that receive two activations as inputs
310 INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleAdd, x, y)
311 INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleBatchMatMul, x, y)
312 INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleDiv, x, y)
313 INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleFloorDiv, x, y)
314 INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMaximum, x, y)
315 INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMinimum, x, y)
316 INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMul, x, y)
317 INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleOneHot, on_value, off_value)
318 INSERT_QUANTIZE_TO_BINARY_OP(luci::CirclePow, x, y)
319 INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleSub, x, y)
321 // Multiple-output ops that receive one activation as inputs
322 INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplit, input, luci::CircleSplitOut)
323 INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplitV, input, luci::CircleSplitVOut)
324 INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleTopKV2, input, luci::CircleTopKV2Out)
325 INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnique, input, luci::CircleUniqueOut)
326 INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnpack, value, luci::CircleUnpackOut)
328 // AddN has arbitrary number of inputs
329 void visit(luci::CircleAddN *node)
331 auto arity = node->arity();
332 for (uint32_t i = 0; i < arity; i++)
334 if (auto input_quant = create_in_quantize(node->inputs(i), node))
335 node->inputs(i, input_quant);
338 insert_out_quantize(node);
341 // Concat has arbitrary number of inputs
342 void visit(luci::CircleConcatenation *node)
344 auto arity = node->arity();
345 for (uint32_t i = 0; i < arity; i++)
347 if (auto input_quant = create_in_quantize(node->values(i), node))
348 node->values(i, input_quant);
351 insert_out_quantize(node);
354 // Pack has arbitrary number of inputs
355 void visit(luci::CirclePack *node)
357 auto arity = node->arity();
358 for (uint32_t i = 0; i < arity; i++)
360 if (auto input_quant = create_in_quantize(node->values(i), node))
361 node->values(i, input_quant);
364 insert_out_quantize(node);
367 #undef INSERT_QUANTIZE_TO_UNARY_OP
368 #undef INSERT_QUANTIZE_TO_BINARY_OP
369 #undef INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP
374 void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
376 auto inputs = g->inputs();
378 assert(inputs); // FIX_CALLER_UNLESS
379 assert(inputs->size() == _ctx->input_types.size()); // FIX_CALLER_UNLESS
381 // NOTE loco::input_nodes returns input nodes following the order of InputIndex
382 auto input_nodes = loco::input_nodes(g);
383 for (uint32_t i = 0; i < input_nodes.size(); i++)
385 auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
386 assert(i == input->index()); // Fix input_type logic
388 const auto user_given_dtype = _ctx->input_types[i];
390 if (input->dtype() == user_given_dtype)
393 // Bool type is not quantizable
394 if (input->dtype() == loco::DataType::BOOL)
396 if (input->dtype() == loco::DataType::S32)
398 if (input->dtype() == loco::DataType::S64)
401 // Insert Quantize Op
402 auto quant_op = create_quantize_op(input, input->dtype());
403 loco::replace(input).with(quant_op);
404 quant_op->input(input);
406 // TODO Set a proper origin (Quantize should have its own Origin)
408 auto succs = loco::succs(quant_op);
409 assert(succs.size() > 0);
410 auto succ = loco::must_cast<luci::CircleNode *>(*succs.begin());
411 luci::add_origin(quant_op, luci::get_origin(succ));
414 // Update qparam of input
415 // This step is skipped if input_type is float32
416 if (user_given_dtype != loco::DataType::FLOAT32)
418 auto quantparam = input->quantparam();
420 assert(quantparam->min.size() == 1); // only support layer-wise quant
421 assert(quantparam->max.size() == 1); // only support layer-wise quant
422 auto min = quantparam->min[0];
423 auto max = quantparam->max[0];
425 float scaling_factor{0};
430 if (user_given_dtype == loco::DataType::U8)
432 compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
436 assert(user_given_dtype == loco::DataType::S16);
437 compute_sym_scale(min, max, scaling_factor, nudged_min, nudged_max);
439 input->quantparam()->scale[0] = scaling_factor;
440 input->quantparam()->zerop[0] = zp;
443 // Update dtype of input
444 input->dtype(user_given_dtype);
446 auto graph_input = inputs->at(input->index());
447 graph_input->dtype(user_given_dtype);
451 void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
453 auto outputs = g->outputs();
454 assert(outputs); // FIX_CALLER_UNLESS
455 assert(outputs->size() == _ctx->output_types.size()); // Fix CircleQuantizer unless
457 // NOTE loco::output_nodes returns output nodes following the order of OutputIndex
458 auto output_nodes = loco::output_nodes(g);
459 for (uint32_t i = 0; i < output_nodes.size(); i++)
461 auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
462 assert(i == output->index()); // Fix output_type logic
464 const auto user_given_dtype = _ctx->output_types[i];
466 if (output->dtype() == user_given_dtype)
469 // Bool type is not quantizable
470 if (output->dtype() == loco::DataType::BOOL)
473 auto from = loco::must_cast<luci::CircleNode *>(output->from());
475 // The last Op is not quantizable (ex: ArgMax)
476 if (not from->quantparam())
479 // Insert Dequantize Op for float32 output_type
480 if (user_given_dtype == loco::DataType::FLOAT32)
482 auto dequant_op = create_dequantize(from);
483 dequant_op->input(from);
484 output->from(dequant_op);
488 // Insert Quantize Op for non-float32 output_type
489 auto quant_op = create_quantize_op(from, user_given_dtype);
490 quant_op->input(from);
491 output->from(quant_op);
493 // TODO Set a proper origin (Quantize should have its own Origin)
494 luci::add_origin(quant_op, luci::get_origin(from));
497 // Update dtype of output
498 output->dtype(user_given_dtype);
500 auto graph_output = outputs->at(output->index());
501 graph_output->dtype(user_given_dtype);
506 * How QuantizeWithMinMax works?
508 * We categorized tensors into four groups
509 * - Activation: Feature maps (both Const/Non-const)
510 * - Weights: Const tensors of specific Ops (Conv, FC, ...)
511 * - Bias: Const tensors of specific Ops (Conv, FC, ...)
512 * - Others: padding value, one_hot value, axis, ..
514 * Activation is quantized in different ways
515 * 1. For non-constant activation, quantize using recorded min/max
516 * 2. For constant activation, quantize using min/max of its value
517 * 3. For some Ops (ex: pad_v2), output qparam is used as input qparam (backward propagation)
518 * 4. For some Ops (ex: reshape), input qparam is used as output qparam (forward propagation)
519 * 5. For some Ops (ex: tanh), output qparam has pre-defined values
521 * Weights is quantized using min/max of its value
523 * Bias is quantized using input scale (s_i) and weights scale (s_w)
524 * - Therefore, activation and weights should be quantized earlier than bias
526 * Overall Quantization Steps
527 * 1. Quantize Activation
528 * - Quantize using recorded min/max (QuantizeActivation)
529 * - Insert Quantize Ops for mixed-precision quantization (InsertQuantizeOp)
530 * - Remove redundant Quantize Ops (RemoveRedundantQuantizePass)
531 * - Propagate qparam backward (PropagateQParamBackwardPass)
532 * - Quantize const inputs (QuantizeConstInputActivation)
533 * - Quantize using pre-defined values (QuantizeSpecialActivation)
534 * - Propagate qparam forward (PropagateQParamForwardPass)
535 * 2. Quantize Weights
538 * 5. Set output dtype
540 * Why quantization sequence was determined as above?
541 * - Activation and weights should be quantized before bias (1->2->3). Input/Output
542 * dtype can be updated at the end (4->5).
543 * - During activation quantization,
544 * - Backward propagation is performed earlier than forward propagation. This allows
545 * backward-propagated qpram to be overwritten during forward propagation.
546 * We made this decision as Ops for forward propagation (reshape, transpose, ..)
547 * are more common than backward propagation. TODO Check this decision is safe.
548 * - QuantizeSpecialActivation is called before forward propagation to make sure that
549 * the pre-defined qparam values are propagated.
551 bool QuantizeWithMinMaxPass::run(loco::Graph *g)
554 INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
556 auto info_by_name = layer_info_map(g, _ctx->layers_info);
558 auto quantize_dtype = [&](const luci::CircleNode *node) {
559 auto iter = info_by_name.find(node->name());
561 // Return designated quantization dtype
562 if (iter != info_by_name.end())
563 return iter->second.dtype;
565 // Return default quantization dtype
566 return _ctx->output_model_dtype;
569 auto quantize_granularity = [&](const luci::CircleNode *node) {
570 auto iter = info_by_name.find(node->name());
572 // Return designated quantization granularity
573 if (iter != info_by_name.end())
574 return iter->second.granularity;
576 // Return default quantization granularity
577 return _ctx->granularity;
580 // Quantize activation
582 // Models can have inactive (unused) inputs.
583 // We do not reject such models, but quantize them too
584 for (auto node : loco::all_nodes(g))
586 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
587 QuantizeActivation qa(_ctx->input_model_dtype, quantize_dtype(circle_node));
588 circle_node->accept(&qa);
591 // Insert Quantize Op
592 for (auto node : loco::active_nodes(loco::output_nodes(g)))
594 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
595 auto op_dtype = quantize_dtype(circle_node);
596 if (op_dtype != _ctx->output_model_dtype)
598 InsertQuantizeOp iqo(_ctx->output_model_dtype, op_dtype);
599 circle_node->accept(&iqo);
603 // Remove redundant Quantize Op
607 phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
609 ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
610 logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
611 phase_runner.attach(&prog);
612 phase_runner.run(phase);
615 // Backward propagation of activation qparam
617 PropagateQParamBackwardPass pqbp(_ctx->output_model_dtype);
621 // Quantize const input activation
622 for (auto node : loco::active_nodes(loco::output_nodes(g)))
624 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
625 QuantizeConstInputActivation qcia(quantize_dtype(circle_node));
626 circle_node->accept(&qcia);
629 // Update qparam of output of special Ops
630 for (auto node : loco::active_nodes(loco::output_nodes(g)))
632 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
634 // At this point, all activations have to be quantized.
635 // Un-quantized nodes are not the quantization target (ex: int32 tensor),
637 if (circle_node->quantparam() == nullptr)
640 QuantizeSpecialActivation qsa(_ctx->input_model_dtype, quantize_dtype(circle_node));
641 circle_node->accept(&qsa);
644 // Forward propagation of activation qparam
647 phase.emplace_back(std::make_unique<luci::PropagateQParamForwardPass>(_ctx->TF_style_maxpool));
649 ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
650 logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
651 phase_runner.attach(&prog);
652 phase_runner.run(phase);
655 for (auto node : loco::active_nodes(loco::output_nodes(g)))
657 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
658 QuantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
659 quantize_granularity(circle_node));
660 circle_node->accept(&qw);
664 for (auto node : loco::active_nodes(loco::output_nodes(g)))
666 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
667 QuantizeBias qb(_ctx->input_model_dtype, quantize_dtype(circle_node),
668 quantize_granularity(circle_node));
669 circle_node->accept(&qb);
672 // Update output dtype
673 auto graph_outputs = g->outputs();
674 for (auto node : loco::output_nodes(g))
676 auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
677 if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _ctx->output_model_dtype)
679 circle_node->dtype(_ctx->output_model_dtype);
680 auto graph_output = graph_outputs->at(circle_node->index());
681 graph_output->dtype(_ctx->output_model_dtype);
691 // Remove redundant Quantize Op
695 phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
697 ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
698 logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
699 phase_runner.attach(&prog);
700 phase_runner.run(phase);
703 // Remove min/max values
704 for (auto node : loco::active_nodes(loco::output_nodes(g)))
706 auto circle_node = loco::must_cast<luci::CircleNode *>(node);
707 if (auto qparam = circle_node->quantparam())
709 warn_accuracy_with_range(circle_node);
715 INFO(l) << "QuantizeWithMinMaxPass End" << std::endl;
716 return false; // one time run