compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "luci/Pass/QuantizeWithMinMaxPass.h"
  18 #include "luci/Pass/PropagateQParamForwardPass.h"
  19 #include "luci/Pass/PropagateQParamBackwardPass.h"
  20 #include "luci/Pass/RemoveRedundantQuantizePass.h"
  21 #include "QuantizeActivation.h"
  22 #include "QuantizeWeights.h"
  23 #include "QuantizeBias.h"
  24 #include "QuantizationUtils.h"
  25 #include "ProgressReporter.h"
  26 #include "helpers/LayerInfoMap.h"
  27
  28 #include <luci/IR/CircleNodes.h>
  29 #include <luci/IR/CircleNodeVisitor.h>
  30 #include <luci/Service/Nodes/CircleConst.h>
  31 #include <luci/Profile/CircleNodeOrigin.h>
  32 #include <luci/Log.h>
  33 #include <logo/Phase.h>
  34
  35 #include <iostream>
  36 #include <cmath>
  37
  38 namespace
  39 {
  40
  41 using namespace luci;
  42
  43 bool use_predefined_values(ActivationQType qtype)
  44 {
  45   switch (qtype)
  46   {
  47     case ActivationQType::PreDefinedLogistic:
  48     case ActivationQType::PreDefinedTanh:
  49     case ActivationQType::PreDefinedSoftmax:
  50       return true;
  51     default:
  52       // This ensures this switch-statement handles all ActivationQTypes
  53       assert(qtype == ActivationQType::IntScale or qtype == ActivationQType::MinMax);
  54       break;
  55   }
  56
  57   return false;
  58 }
  59
  60 // Create a Quantize Op whose
  61 // dtype is out_type
  62 // shape is the same with node
  63 // qparam is computed according to node's qtype
  64 luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type)
  65 {
  66   auto quantize = node->graph()->nodes()->create<CircleQuantize>();
  67   quantize->name(node->name() + "_Quantize");
  68   quantize->dtype(out_type);
  69   quantize->rank(node->rank());
  70   for (uint32_t i = 0; i < node->rank(); i++)
  71     quantize->dim(i).set(node->dim(i).value());
  72
  73   quantize->shape_status(luci::ShapeStatus::VALID);
  74
  75   auto qparam = node->quantparam();
  76   assert(qparam); // FIX_CALLER_UNLESS
  77
  78   auto qtype = luci::activation_qtype(node);
  79   if (use_predefined_values(qtype))
  80   {
  81     quantize->quantparam(luci::make_predefined_qparam(qtype, out_type));
  82     return quantize;
  83   }
  84
  85   assert(qtype == ActivationQType::MinMax or qtype == ActivationQType::IntScale);
  86
  87   assert(qparam->min.size() == 1); // FIX_CALLER_UNLESS
  88   assert(qparam->max.size() == 1); // FIX_CALLER_UNLESS
  89   auto min = qparam->min[0];
  90   auto max = qparam->max[0];
  91
  92   float scaling_factor{0};
  93   int64_t zp{0};
  94   float nudged_min{0};
  95   float nudged_max{0};
  96
  97   if (out_type == loco::DataType::U8)
  98   {
  99     compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
 100   }
 101   else
 102   {
 103     assert(out_type == loco::DataType::S16);
 104     compute_sym_scale(min, max, scaling_factor, nudged_min, nudged_max);
 105   }
 106
 107   auto quantparam = std::make_unique<CircleQuantParam>();
 108   quantparam->scale.push_back(scaling_factor);
 109   quantparam->zerop.push_back(zp);
 110   // Save original min/max (not nudged_min/max). Nudged min/max
 111   // is different from the real min/max values, causing wrong
 112   // qparam when quantization dtype is changed.
 113   quantparam->min.push_back(min);
 114   quantparam->max.push_back(max);
 115
 116   quantize->quantparam(std::move(quantparam));
 117
 118   if (qtype == ActivationQType::IntScale)
 119     set_int_scale(quantize);
 120
 121   return quantize;
 122 }
 123
 124 // Create Dequantize Op whose shape is the same with node
 125 luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
 126 {
 127   auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
 128   dequantize->name(node->name() + "_Dequantize");
 129   dequantize->dtype(loco::DataType::FLOAT32);
 130   dequantize->rank(node->rank());
 131   for (uint32_t i = 0; i < node->rank(); i++)
 132     dequantize->dim(i).set(node->dim(i).value());
 133
 134   dequantize->shape_status(luci::ShapeStatus::VALID);
 135
 136   luci::add_origin(dequantize, luci::get_origin(node));
 137
 138   return dequantize;
 139 }
 140
 141 } // namespace
 142
 143 namespace luci
 144 {
 145
 146 namespace
 147 {
 148
 149 /**
 150  * Insert Quantize operator for mixed-precision quantization
 151  * 1. Before input feature map (only for non-const)
 152  * 2. After output feature map
 153  *
 154  * For example, if default_dtype = U8 and op_dtype = S16,
 155  * 1. Quantize (U8->S16) is inserted before ifm
 156  * 2. Quantize (S16->U8) is inserted after ofm
 157  *
 158  * Why not insert Quantize Op for const ifm?
 159  * We quantize const tensor at once to preserve precision.
 160  * For example, if default dtype = U8, op_dtype = S16, and op is CONV2D,
 161  * We directly quantize weights to 16 bits, not 8->16 bits.
 162  */
 163 struct InsertQuantizeOp final : public luci::CircleNodeMutableVisitor<void>
 164 {
 165   InsertQuantizeOp(loco::DataType default_dtype, loco::DataType op_dtype)
 166     : _default_dtype(default_dtype), _op_dtype(op_dtype)
 167   {
 168     assert(default_dtype != op_dtype); // FIX_CALLER_UNLESS
 169   }
 170
 171 private:
 172   loco::DataType _default_dtype;
 173   loco::DataType _op_dtype;
 174
 175 private:
 176   luci::CircleQuantize *create_in_quantize(loco::Node *in, loco::Node *origin)
 177   {
 178     auto input = loco::must_cast<luci::CircleNode *>(in);
 179     if (input->opcode() == luci::CircleOpcode::CIRCLECONST)
 180       return nullptr;
 181
 182     // input is not quantizable (ex: index)
 183     if (input->quantparam() == nullptr)
 184       return nullptr;
 185
 186     auto input_quant = create_quantize_op(input, _op_dtype);
 187     input_quant->input(input);
 188     auto origin_node = loco::must_cast<luci::CircleNode *>(origin);
 189     luci::add_origin(input_quant, luci::get_origin(origin_node));
 190     return input_quant;
 191   }
 192
 193   void insert_out_quantize(loco::Node *node)
 194   {
 195     auto output = loco::must_cast<luci::CircleNode *>(node);
 196     assert(output->opcode() != luci::CircleOpcode::CIRCLECONST); // FIX_CALLER_UNLESS
 197
 198     // output is not quantizable (ex: index)
 199     if (output->quantparam() == nullptr)
 200       return;
 201
 202     auto output_quant = create_quantize_op(output, _default_dtype);
 203
 204     luci::add_origin(output_quant, luci::get_origin(output));
 205     loco::replace(node).with(output_quant);
 206     output_quant->input(node);
 207   }
 208
 209 // INPUT_NAME is the only activation of NODE
 210 #define INSERT_QUANTIZE_TO_UNARY_OP(NODE, INPUT_NAME)                    \
 211   void visit(NODE *node)                                                 \
 212   {                                                                      \
 213     if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node)) \
 214       node->INPUT_NAME(input_quant);                                     \
 215                                                                          \
 216     insert_out_quantize(node);                                           \
 217   }
 218
 219 // INPUT_NAME is the only activation of NODE
 220 #define INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(NODE, INPUT_NAME, OUT_NAME) \
 221   void visit(NODE *node)                                                     \
 222   {                                                                          \
 223     if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node))     \
 224       node->INPUT_NAME(input_quant);                                         \
 225                                                                              \
 226     auto out_nodes = loco::succs(node);                                      \
 227     for (auto out_node : out_nodes)                                          \
 228     {                                                                        \
 229       auto out_circle = loco::must_cast<OUT_NAME *>(out_node);               \
 230       insert_out_quantize(out_circle);                                       \
 231     }                                                                        \
 232   }
 233
 234 // INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE
 235 #define INSERT_QUANTIZE_TO_BINARY_OP(NODE, INPUT_NAME1, INPUT_NAME2)       \
 236   void visit(NODE *node)                                                   \
 237   {                                                                        \
 238     if (auto input1_quant = create_in_quantize(node->INPUT_NAME1(), node)) \
 239       node->INPUT_NAME1(input1_quant);                                     \
 240                                                                            \
 241     if (auto input2_quant = create_in_quantize(node->INPUT_NAME2(), node)) \
 242       node->INPUT_NAME2(input2_quant);                                     \
 243                                                                            \
 244     insert_out_quantize(node);                                             \
 245   }
 246
 247   // Default behavior (NYI)
 248   void visit(luci::CircleNode *node)
 249   {
 250     throw std::runtime_error("Unsupported Op for mixed-precision quantization. Layer name: " +
 251                              node->name());
 252   }
 253
 254   // Skip output layer
 255   void visit(luci::CircleOutput *) {}
 256   void visit(luci::CircleSplitVOut *) {}
 257   void visit(luci::CircleSplitOut *) {}
 258   void visit(luci::CircleTopKV2Out *) {}
 259   void visit(luci::CircleUniqueOut *) {}
 260   void visit(luci::CircleUnpackOut *) {}
 261
 262   // Ops that receive a single activation as an input
 263   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAbs, x)
 264   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAveragePool2D, value)
 265   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleBatchToSpaceND, input)
 266   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleConv2D, input)
 267   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthToSpace, input)
 268   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthwiseConv2D, input)
 269   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleElu, features)
 270   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleExp, x)
 271   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFloor, x)
 272   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input)
 273   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params)
 274   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGelu, features)
 275   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input)
 276   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLeakyRelu, features)
 277   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input)
 278   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x)
 279   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value)
 280   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input)
 281   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input)
 282   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleNeg, x)
 283   INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input)
 284   INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input)
 285   INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input)
 286   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceProd, input)
 287   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input)
 288   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input)
 289   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features)
 290   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu6, features)
 291   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor)
 292   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input)
 293   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input)
 294   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReverseSequence, input)
 295   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRsqrt, x)
 296   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSlice, input)
 297   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits)
 298   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input)
 299   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input)
 300   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqueeze, input)
 301   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x)
 302   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input)
 303   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input)
 304   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTanh, x)
 305   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTile, input)
 306   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTranspose, a)
 307   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTransposeConv, outBackprop)
 308
 309   // Ops that receive two activations as inputs
 310   INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleAdd, x, y)
 311   INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleBatchMatMul, x, y)
 312   INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleDiv, x, y)
 313   INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleFloorDiv, x, y)
 314   INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMaximum, x, y)
 315   INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMinimum, x, y)
 316   INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMul, x, y)
 317   INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleOneHot, on_value, off_value)
 318   INSERT_QUANTIZE_TO_BINARY_OP(luci::CirclePow, x, y)
 319   INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleSub, x, y)
 320
 321   // Multiple-output ops that receive one activation as inputs
 322   INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplit, input, luci::CircleSplitOut)
 323   INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplitV, input, luci::CircleSplitVOut)
 324   INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleTopKV2, input, luci::CircleTopKV2Out)
 325   INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnique, input, luci::CircleUniqueOut)
 326   INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnpack, value, luci::CircleUnpackOut)
 327
 328   // AddN has arbitrary number of inputs
 329   void visit(luci::CircleAddN *node)
 330   {
 331     auto arity = node->arity();
 332     for (uint32_t i = 0; i < arity; i++)
 333     {
 334       if (auto input_quant = create_in_quantize(node->inputs(i), node))
 335         node->inputs(i, input_quant);
 336     }
 337
 338     insert_out_quantize(node);
 339   }
 340
 341   // Concat has arbitrary number of inputs
 342   void visit(luci::CircleConcatenation *node)
 343   {
 344     auto arity = node->arity();
 345     for (uint32_t i = 0; i < arity; i++)
 346     {
 347       if (auto input_quant = create_in_quantize(node->values(i), node))
 348         node->values(i, input_quant);
 349     }
 350
 351     insert_out_quantize(node);
 352   }
 353
 354   // Pack has arbitrary number of inputs
 355   void visit(luci::CirclePack *node)
 356   {
 357     auto arity = node->arity();
 358     for (uint32_t i = 0; i < arity; i++)
 359     {
 360       if (auto input_quant = create_in_quantize(node->values(i), node))
 361         node->values(i, input_quant);
 362     }
 363
 364     insert_out_quantize(node);
 365   }
 366
 367 #undef INSERT_QUANTIZE_TO_UNARY_OP
 368 #undef INSERT_QUANTIZE_TO_BINARY_OP
 369 #undef INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP
 370 };
 371
 372 } // namespace
 373
 374 void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
 375 {
 376   auto inputs = g->inputs();
 377
 378   assert(inputs);                                     // FIX_CALLER_UNLESS
 379   assert(inputs->size() == _ctx->input_types.size()); // FIX_CALLER_UNLESS
 380
 381   // NOTE loco::input_nodes returns input nodes following the order of InputIndex
 382   auto input_nodes = loco::input_nodes(g);
 383   for (uint32_t i = 0; i < input_nodes.size(); i++)
 384   {
 385     auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
 386     assert(i == input->index()); // Fix input_type logic
 387
 388     const auto user_given_dtype = _ctx->input_types[i];
 389
 390     if (input->dtype() == user_given_dtype)
 391       continue;
 392
 393     // Bool type is not quantizable
 394     if (input->dtype() == loco::DataType::BOOL)
 395       continue;
 396     if (input->dtype() == loco::DataType::S32)
 397       continue;
 398     if (input->dtype() == loco::DataType::S64)
 399       continue;
 400
 401     // Insert Quantize Op
 402     auto quant_op = create_quantize_op(input, input->dtype());
 403     loco::replace(input).with(quant_op);
 404     quant_op->input(input);
 405
 406     // TODO Set a proper origin (Quantize should have its own Origin)
 407     {
 408       auto succs = loco::succs(quant_op);
 409       assert(succs.size() > 0);
 410       auto succ = loco::must_cast<luci::CircleNode *>(*succs.begin());
 411       luci::add_origin(quant_op, luci::get_origin(succ));
 412     }
 413
 414     // Update qparam of input
 415     // This step is skipped if input_type is float32
 416     if (user_given_dtype != loco::DataType::FLOAT32)
 417     {
 418       auto quantparam = input->quantparam();
 419       assert(quantparam);
 420       assert(quantparam->min.size() == 1); // only support layer-wise quant
 421       assert(quantparam->max.size() == 1); // only support layer-wise quant
 422       auto min = quantparam->min[0];
 423       auto max = quantparam->max[0];
 424
 425       float scaling_factor{0};
 426       int64_t zp{0};
 427       float nudged_min{0};
 428       float nudged_max{0};
 429
 430       if (user_given_dtype == loco::DataType::U8)
 431       {
 432         compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
 433       }
 434       else
 435       {
 436         assert(user_given_dtype == loco::DataType::S16);
 437         compute_sym_scale(min, max, scaling_factor, nudged_min, nudged_max);
 438       }
 439       input->quantparam()->scale[0] = scaling_factor;
 440       input->quantparam()->zerop[0] = zp;
 441     }
 442
 443     // Update dtype of input
 444     input->dtype(user_given_dtype);
 445
 446     auto graph_input = inputs->at(input->index());
 447     graph_input->dtype(user_given_dtype);
 448   }
 449 }
 450
 451 void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
 452 {
 453   auto outputs = g->outputs();
 454   assert(outputs);                                      // FIX_CALLER_UNLESS
 455   assert(outputs->size() == _ctx->output_types.size()); // Fix CircleQuantizer unless
 456
 457   // NOTE loco::output_nodes returns output nodes following the order of OutputIndex
 458   auto output_nodes = loco::output_nodes(g);
 459   for (uint32_t i = 0; i < output_nodes.size(); i++)
 460   {
 461     auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
 462     assert(i == output->index()); // Fix output_type logic
 463
 464     const auto user_given_dtype = _ctx->output_types[i];
 465
 466     if (output->dtype() == user_given_dtype)
 467       continue;
 468
 469     // Bool type is not quantizable
 470     if (output->dtype() == loco::DataType::BOOL)
 471       continue;
 472
 473     auto from = loco::must_cast<luci::CircleNode *>(output->from());
 474
 475     // The last Op is not quantizable (ex: ArgMax)
 476     if (not from->quantparam())
 477       continue;
 478
 479     // Insert Dequantize Op for float32 output_type
 480     if (user_given_dtype == loco::DataType::FLOAT32)
 481     {
 482       auto dequant_op = create_dequantize(from);
 483       dequant_op->input(from);
 484       output->from(dequant_op);
 485     }
 486     else
 487     {
 488       // Insert Quantize Op for non-float32 output_type
 489       auto quant_op = create_quantize_op(from, user_given_dtype);
 490       quant_op->input(from);
 491       output->from(quant_op);
 492
 493       // TODO Set a proper origin (Quantize should have its own Origin)
 494       luci::add_origin(quant_op, luci::get_origin(from));
 495     }
 496
 497     // Update dtype of output
 498     output->dtype(user_given_dtype);
 499
 500     auto graph_output = outputs->at(output->index());
 501     graph_output->dtype(user_given_dtype);
 502   }
 503 }
 504
 505 /**
 506  * How QuantizeWithMinMax works?
 507  *
 508  * We categorized tensors into four groups
 509  * - Activation: Feature maps (both Const/Non-const)
 510  * - Weights: Const tensors of specific Ops (Conv, FC, ...)
 511  * - Bias: Const tensors of specific Ops (Conv, FC, ...)
 512  * - Others: padding value, one_hot value, axis, ..
 513  *
 514  * Activation is quantized in different ways
 515  * 1. For non-constant activation, quantize using recorded min/max
 516  * 2. For constant activation, quantize using min/max of its value
 517  * 3. For some Ops (ex: pad_v2), output qparam is used as input qparam (backward propagation)
 518  * 4. For some Ops (ex: reshape), input qparam is used as output qparam (forward propagation)
 519  * 5. For some Ops (ex: tanh), output qparam has pre-defined values
 520  *
 521  * Weights is quantized using min/max of its value
 522  *
 523  * Bias is quantized using input scale (s_i) and weights scale (s_w)
 524  * - Therefore, activation and weights should be quantized earlier than bias
 525  *
 526  * Overall Quantization Steps
 527  * 1. Quantize Activation
 528  *   - Quantize using recorded min/max (QuantizeActivation)
 529  *   - Insert Quantize Ops for mixed-precision quantization (InsertQuantizeOp)
 530  *   - Remove redundant Quantize Ops (RemoveRedundantQuantizePass)
 531  *   - Propagate qparam backward (PropagateQParamBackwardPass)
 532  *   - Quantize const inputs (QuantizeConstInputActivation)
 533  *   - Quantize using pre-defined values (QuantizeSpecialActivation)
 534  *   - Propagate qparam forward (PropagateQParamForwardPass)
 535  * 2. Quantize Weights
 536  * 3. Quantize Bias
 537  * 4. Set input dtype
 538  * 5. Set output dtype
 539  *
 540  * Why quantization sequence was determined as above?
 541  * - Activation and weights should be quantized before bias (1->2->3). Input/Output
 542  *   dtype can be updated at the end (4->5).
 543  * - During activation quantization,
 544  *   - Backward propagation is performed earlier than forward propagation. This allows
 545  *     backward-propagated qpram to be overwritten during forward propagation.
 546  *     We made this decision as Ops for forward propagation (reshape, transpose, ..)
 547  *     are more common than backward propagation. TODO Check this decision is safe.
 548  *   - QuantizeSpecialActivation is called before forward propagation to make sure that
 549  *     the pre-defined qparam values are propagated.
 550  */
 551 bool QuantizeWithMinMaxPass::run(loco::Graph *g)
 552 {
 553   LOGGER(l);
 554   INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
 555
 556   auto info_by_name = layer_info_map(g, _ctx->layers_info);
 557
 558   auto quantize_dtype = [&](const luci::CircleNode *node) {
 559     auto iter = info_by_name.find(node->name());
 560
 561     // Return designated quantization dtype
 562     if (iter != info_by_name.end())
 563       return iter->second.dtype;
 564
 565     // Return default quantization dtype
 566     return _ctx->output_model_dtype;
 567   };
 568
 569   auto quantize_granularity = [&](const luci::CircleNode *node) {
 570     auto iter = info_by_name.find(node->name());
 571
 572     // Return designated quantization granularity
 573     if (iter != info_by_name.end())
 574       return iter->second.granularity;
 575
 576     // Return default quantization granularity
 577     return _ctx->granularity;
 578   };
 579
 580   // Quantize activation
 581   // Why all_nodes?
 582   // Models can have inactive (unused) inputs.
 583   // We do not reject such models, but quantize them too
 584   for (auto node : loco::all_nodes(g))
 585   {
 586     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 587     QuantizeActivation qa(_ctx->input_model_dtype, quantize_dtype(circle_node));
 588     circle_node->accept(&qa);
 589   }
 590
 591   // Insert Quantize Op
 592   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 593   {
 594     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 595     auto op_dtype = quantize_dtype(circle_node);
 596     if (op_dtype != _ctx->output_model_dtype)
 597     {
 598       InsertQuantizeOp iqo(_ctx->output_model_dtype, op_dtype);
 599       circle_node->accept(&iqo);
 600     }
 601   }
 602
 603   // Remove redundant Quantize Op
 604   {
 605     logo::Phase phase;
 606
 607     phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
 608
 609     ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
 610     logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
 611     phase_runner.attach(&prog);
 612     phase_runner.run(phase);
 613   }
 614
 615   // Backward propagation of activation qparam
 616   {
 617     PropagateQParamBackwardPass pqbp(_ctx->output_model_dtype);
 618     pqbp.run(g);
 619   }
 620
 621   // Quantize const input activation
 622   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 623   {
 624     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 625     QuantizeConstInputActivation qcia(quantize_dtype(circle_node));
 626     circle_node->accept(&qcia);
 627   }
 628
 629   // Update qparam of output of special Ops
 630   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 631   {
 632     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 633
 634     // At this point, all activations have to be quantized.
 635     // Un-quantized nodes are not the quantization target (ex: int32 tensor),
 636     // so we skip them
 637     if (circle_node->quantparam() == nullptr)
 638       continue;
 639
 640     QuantizeSpecialActivation qsa(_ctx->input_model_dtype, quantize_dtype(circle_node));
 641     circle_node->accept(&qsa);
 642   }
 643
 644   // Forward propagation of activation qparam
 645   logo::Phase phase;
 646
 647   phase.emplace_back(std::make_unique<luci::PropagateQParamForwardPass>(_ctx->TF_style_maxpool));
 648
 649   ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
 650   logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
 651   phase_runner.attach(&prog);
 652   phase_runner.run(phase);
 653
 654   // Quantize weights
 655   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 656   {
 657     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 658     QuantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
 659                        quantize_granularity(circle_node));
 660     circle_node->accept(&qw);
 661   }
 662
 663   // Quantize bias
 664   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 665   {
 666     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 667     QuantizeBias qb(_ctx->input_model_dtype, quantize_dtype(circle_node),
 668                     quantize_granularity(circle_node));
 669     circle_node->accept(&qb);
 670   }
 671
 672   // Update output dtype
 673   auto graph_outputs = g->outputs();
 674   for (auto node : loco::output_nodes(g))
 675   {
 676     auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
 677     if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _ctx->output_model_dtype)
 678     {
 679       circle_node->dtype(_ctx->output_model_dtype);
 680       auto graph_output = graph_outputs->at(circle_node->index());
 681       graph_output->dtype(_ctx->output_model_dtype);
 682     }
 683   }
 684
 685   // Set input type
 686   set_input_type(g);
 687
 688   // Set output type
 689   set_output_type(g);
 690
 691   // Remove redundant Quantize Op
 692   {
 693     logo::Phase phase;
 694
 695     phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
 696
 697     ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
 698     logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
 699     phase_runner.attach(&prog);
 700     phase_runner.run(phase);
 701   }
 702
 703   // Remove min/max values
 704   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 705   {
 706     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 707     if (auto qparam = circle_node->quantparam())
 708     {
 709       warn_accuracy_with_range(circle_node);
 710       qparam->min.clear();
 711       qparam->max.clear();
 712     }
 713   }
 714
 715   INFO(l) << "QuantizeWithMinMaxPass End" << std::endl;
 716   return false; // one time run
 717 }
 718
 719 } // namespace luci