compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "luci/Pass/QuantizeWithMinMaxPass.h"
  18 #include "QuantizationUtils.h"
  19
  20 #include <luci/IR/CircleNodes.h>
  21 #include <luci/IR/CircleNodeVisitor.h>
  22 #include <luci/Log.h>
  23
  24 #include <oops/UserExn.h>
  25
  26 #include <iostream>
  27 #include <cmath>
  28
  29 namespace luci
  30 {
  31
  32 namespace
  33 {
  34
  35 void overwrite_quantparam(luci::CircleConcatenation *concat, luci::CircleNode *target)
  36 {
  37   auto concat_qparam = concat->quantparam();
  38   if (concat_qparam == nullptr)
  39     throw std::runtime_error("quantparam of concat is not found during overwrite");
  40
  41   auto target_qparam = target->quantparam();
  42   if (target_qparam == nullptr)
  43   {
  44     auto quantparam = std::make_unique<CircleQuantParam>();
  45     target->quantparam(std::move(quantparam));
  46     target_qparam = target->quantparam();
  47   }
  48   target_qparam->min = concat_qparam->min;
  49   target_qparam->max = concat_qparam->max;
  50   target_qparam->scale = concat_qparam->scale;
  51   target_qparam->zerop = concat_qparam->zerop;
  52   target_qparam->quantized_dimension = concat_qparam->quantized_dimension;
  53 }
  54
  55 void quant_const_values(luci::CircleConst *const_node, float scaling_factor, float zerop,
  56                         loco::DataType quant_type)
  57 {
  58   uint32_t size = const_node->size<loco::DataType::FLOAT32>();
  59
  60   const float scaling_factor_inv = 1.0 / scaling_factor;
  61   std::vector<int32_t> quantized_values(size);
  62   for (uint32_t i = 0; i < size; ++i)
  63   {
  64     auto data = const_node->at<loco::DataType::FLOAT32>(i);
  65     quantized_values[i] = static_cast<int32_t>(std::round(data * scaling_factor_inv) + zerop);
  66   }
  67
  68   switch (quant_type)
  69   {
  70     case loco::DataType::U8:
  71       const_node->dtype(loco::DataType::U8);      // change the type of tensor
  72       const_node->size<loco::DataType::U8>(size); // resize tensor
  73       for (uint32_t i = 0; i < size; ++i)
  74         const_node->at<loco::DataType::U8>(i) = std::min(255, std::max(0, quantized_values[i]));
  75       break;
  76     case loco::DataType::S16:
  77       assert(zerop == 0);
  78       const_node->dtype(loco::DataType::S16);      // change the type of tensor
  79       const_node->size<loco::DataType::S16>(size); // resize tensor
  80       for (uint32_t i = 0; i < size; ++i)
  81         const_node->at<loco::DataType::S16>(i) =
  82             std::min(32767, std::max(-32767, quantized_values[i]));
  83       break;
  84     default:
  85       throw std::runtime_error("Unsupported data type");
  86   }
  87 }
  88
  89 void quant_const(CircleConst *node, loco::DataType quant_type)
  90 {
  91   assert(node->dtype() == loco::DataType::FLOAT32);
  92
  93   float min = std::numeric_limits<float>::max();
  94   float max = std::numeric_limits<float>::lowest();
  95   for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
  96   {
  97     auto data = node->at<loco::DataType::FLOAT32>(i);
  98     min = data < min ? data : min;
  99     max = data > max ? data : max;
 100   }
 101
 102   float scaling_factor{0.0};
 103   int64_t zp{0};
 104   float nudged_min{0.0};
 105   float nudged_max{0.0};
 106
 107   switch (quant_type)
 108   {
 109     case loco::DataType::U8:
 110       asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
 111                                               nudged_max);
 112       break;
 113     case loco::DataType::S16:
 114       symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
 115                                              nudged_max);
 116       break;
 117     default:
 118       throw std::runtime_error("Unsupported data type");
 119   }
 120
 121   auto quantparam = std::make_unique<CircleQuantParam>();
 122   quantparam->scale.push_back(scaling_factor);
 123   quantparam->zerop.push_back(zp);
 124   node->quantparam(std::move(quantparam));
 125 }
 126
 127 // Check if the node is the bias of Conv2D, DepthwiseConv2D, FullyConnected, or TransposeConv layer
 128 // If true, return <input, weight> pair of the successor node (used to quantize bias)
 129 // If flase, return <nullptr, nullptr>
 130 std::pair<loco::Node *, loco::Node *> get_input_weight_of_bias(CircleNode *node)
 131 {
 132   auto circle_const = dynamic_cast<CircleConst *>(node);
 133   if (circle_const == nullptr)
 134     return std::make_pair(nullptr, nullptr);
 135
 136   auto succs = loco::succs(node);
 137   if (succs.size() != 1) // assume bias is used by only one node
 138     return std::make_pair(nullptr, nullptr);
 139
 140   for (auto out : succs)
 141   {
 142     auto conv = dynamic_cast<CircleConv2D *>(out);
 143     if (conv != nullptr && conv->bias() == circle_const)
 144     {
 145       assert(conv->input() != nullptr);
 146       assert(conv->filter() != nullptr);
 147       return std::make_pair(conv->input(), conv->filter());
 148     }
 149     auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
 150     if (dw_conv != nullptr && dw_conv->bias() == circle_const)
 151     {
 152       assert(dw_conv->input() != nullptr);
 153       assert(dw_conv->filter() != nullptr);
 154       return std::make_pair(dw_conv->input(), dw_conv->filter());
 155     }
 156     auto fc = dynamic_cast<CircleFullyConnected *>(out);
 157     if (fc != nullptr && fc->bias() == circle_const)
 158     {
 159       assert(fc->input() != nullptr);
 160       assert(fc->weights() != nullptr);
 161       return std::make_pair(fc->input(), fc->weights());
 162     }
 163     auto tconv = dynamic_cast<CircleTransposeConv *>(out);
 164     if (tconv != nullptr && tconv->bias() == circle_const)
 165     {
 166       assert(tconv->outBackprop() != nullptr);
 167       assert(tconv->filter() != nullptr);
 168       return std::make_pair(tconv->outBackprop(), tconv->filter());
 169     }
 170   }
 171   return std::make_pair(nullptr, nullptr);
 172 }
 173
 174 void asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
 175                                float *scaling_factor, int64_t *zp)
 176 {
 177   float scale = input_scale * weight_scale;
 178   const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
 179
 180   uint32_t size = node->size<loco::DataType::FLOAT32>();
 181   std::vector<int32_t> quantized_values(size);
 182   for (uint32_t i = 0; i < size; ++i)
 183   {
 184     quantized_values[i] =
 185         static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
 186   }
 187
 188   node->dtype(loco::DataType::S32);      // change the type of tensor
 189   node->size<loco::DataType::S32>(size); // resize tensor
 190   const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
 191   const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
 192   for (uint32_t i = 0; i < size; ++i)
 193   {
 194     node->at<loco::DataType::S32>(i) =
 195         std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 196   }
 197   *scaling_factor = scale;
 198   *zp = 0;
 199 }
 200
 201 void quant_bias_per_channel(CircleConst *node, float input_scale, std::vector<float> &weight_scale,
 202                             std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
 203 {
 204   float scaling_factor_inv{0};
 205
 206   uint32_t size = node->size<loco::DataType::FLOAT32>();
 207   std::vector<int32_t> quantized_values(size);
 208
 209   for (uint32_t i = 0; i < size; ++i)
 210   {
 211     scaling_factor[i] = input_scale * weight_scale[i];
 212     scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
 213     quantized_values[i] =
 214         static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
 215     zp[i] = 0;
 216   }
 217
 218   node->dtype(loco::DataType::S32);      // change the type of tensor
 219   node->size<loco::DataType::S32>(size); // resize tensor
 220   const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
 221   const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
 222   for (uint32_t i = 0; i < size; ++i)
 223   {
 224     node->at<loco::DataType::S32>(i) =
 225         std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 226   }
 227 }
 228
 229 void int16_quant_bias_per_channel(CircleConst *node, float input_scale,
 230                                   std::vector<float> &weight_scale,
 231                                   std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
 232 {
 233   float scaling_factor_inv{0};
 234
 235   uint32_t size = node->size<loco::DataType::FLOAT32>();
 236   std::vector<int64_t> quantized_values(size);
 237
 238   for (uint32_t i = 0; i < size; ++i)
 239   {
 240     scaling_factor[i] = input_scale * weight_scale[i];
 241     scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
 242     quantized_values[i] =
 243         static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
 244     zp[i] = 0;
 245   }
 246
 247   node->dtype(loco::DataType::S64);      // change the type of tensor
 248   node->size<loco::DataType::S64>(size); // resize tensor
 249   for (uint32_t i = 0; i < size; ++i)
 250   {
 251     node->at<loco::DataType::S64>(i) = quantized_values[i];
 252   }
 253 }
 254
 255 bool has_min_max(const CircleNode *node)
 256 {
 257   return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
 258 }
 259
 260 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
 261                             int32_t &channel_dim_index)
 262 {
 263   assert(node->dtype() == loco::DataType::FLOAT32);
 264
 265   const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
 266   const int32_t kMinScale = -kMaxScale;
 267
 268   uint32_t size = node->size<loco::DataType::FLOAT32>();
 269   std::vector<int32_t> quantized_values(size);
 270
 271   loco::TensorShape dimension;
 272   dimension.rank(4);
 273   uint32_t indices[4] = {
 274       0,
 275   };
 276
 277   if (!get_channel_dim_index(node, dimension, channel_dim_index))
 278   {
 279     assert(false);
 280     return;
 281   }
 282
 283   for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
 284   {
 285     for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
 286     {
 287       for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
 288       {
 289         for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
 290         {
 291           int channel_idx = indices[channel_dim_index];
 292           const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
 293           auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
 294           quantized_values[cal_offset(dimension, indices)] =
 295               static_cast<int32_t>(std::round(data * scaling_factor_inv));
 296         }
 297       }
 298     }
 299   }
 300
 301   node->dtype(loco::DataType::S16);      // change the type of tensor
 302   node->size<loco::DataType::S16>(size); // resize tensor
 303   for (uint32_t i = 0; i < size; ++i)
 304   {
 305     node->at<loco::DataType::S16>(i) =
 306         std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 307   }
 308 }
 309
 310 void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
 311                              std::vector<float> &scaling_factor, int32_t &channel_dim_index)
 312 {
 313   assert(node->dtype() == loco::DataType::FLOAT32);
 314
 315   const int32_t kMinScale = 0;
 316   const int32_t kMaxScale = 255;
 317
 318   uint32_t size = node->size<loco::DataType::FLOAT32>();
 319   std::vector<int32_t> quantized_values(size);
 320
 321   loco::TensorShape dimension;
 322   dimension.rank(4);
 323   uint32_t indices[4] = {
 324       0,
 325   };
 326
 327   if (!get_channel_dim_index(node, dimension, channel_dim_index))
 328   {
 329     assert(false);
 330     return;
 331   }
 332
 333   for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
 334   {
 335     for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
 336     {
 337       for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
 338       {
 339         for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
 340         {
 341           int channel_idx = indices[channel_dim_index];
 342           const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
 343           auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
 344           quantized_values[cal_offset(dimension, indices)] =
 345               static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
 346         }
 347       }
 348     }
 349   }
 350
 351   node->dtype(loco::DataType::U8);      // change the type of tensor
 352   node->size<loco::DataType::U8>(size); // resize tensor
 353   for (uint32_t i = 0; i < size; ++i)
 354   {
 355     node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 356   }
 357 }
 358
 359 void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
 360 {
 361   const int32_t kMinScale = 0;
 362   const int32_t kMaxScale = 255;
 363
 364   uint32_t size = node->size<loco::DataType::FLOAT32>();
 365
 366   const float scaling_factor_inv = 1.0 / scaling_factor;
 367   std::vector<int32_t> quantized_values(size);
 368   for (uint32_t i = 0; i < size; ++i)
 369   {
 370     auto data = node->at<loco::DataType::FLOAT32>(i);
 371     quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
 372   }
 373
 374   node->dtype(loco::DataType::U8);      // change the type of tensor
 375   node->size<loco::DataType::U8>(size); // resize tensor
 376   for (uint32_t i = 0; i < size; ++i)
 377   {
 378     node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 379   }
 380 }
 381
 382 /**
 383  * @brief QuantizeActivation quantizes tensors for activations
 384  * @details Quantize using recorded min/max values
 385  */
 386 struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
 387 {
 388   QuantizeActivation(loco::DataType input, loco::DataType output)
 389       : input_type(input), output_type(output)
 390   {
 391   }
 392
 393   loco::DataType input_type;
 394   loco::DataType output_type;
 395
 396   // Quantize input tensors of each node
 397   bool visit(luci::CircleNode *node)
 398   {
 399     LOGGER(l);
 400     INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
 401     auto arity = node->arity();
 402     for (uint32_t i = 0; i < arity; i++)
 403     {
 404       auto input_node = node->arg(i);
 405       auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
 406
 407       // Check if this is already quantized
 408       if (is_quantized(circle_node))
 409         continue;
 410
 411       // Check if this is bias (bias is quantized later)
 412       auto iw = get_input_weight_of_bias(circle_node);
 413       if (iw.first != nullptr && iw.second != nullptr)
 414         continue;
 415
 416       // Check if this is activation
 417       // We assume min/max are recorded only for activations
 418       if (has_min_max(circle_node) && !is_weights(circle_node))
 419       {
 420         // Quantize using recorded min/max
 421         auto quantparam = circle_node->quantparam();
 422         assert(quantparam->min.size() == 1); // only support layer-wise quant
 423         assert(quantparam->max.size() == 1); // only support layer-wise quant
 424         auto min = quantparam->min[0];
 425         auto max = quantparam->max[0];
 426
 427         float scaling_factor{0};
 428         int64_t zp{0};
 429         float nudged_min{0};
 430         float nudged_max{0};
 431
 432         if (output_type == loco::DataType::U8)
 433         {
 434           compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
 435           circle_node->dtype(loco::DataType::U8);
 436         }
 437         else
 438         {
 439           compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
 440           circle_node->dtype(loco::DataType::S16);
 441         }
 442
 443         circle_node->quantparam()->min.clear();
 444         circle_node->quantparam()->max.clear();
 445         circle_node->quantparam()->scale.push_back(scaling_factor);
 446         circle_node->quantparam()->zerop.push_back(zp);
 447       }
 448     }
 449     return false;
 450   }
 451 };
 452
 453 struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
 454 {
 455   QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
 456       : input_type(input), output_type(output), granularity(gr)
 457   {
 458   }
 459
 460   loco::DataType input_type;
 461   loco::DataType output_type;
 462   QuantizationGranularity granularity;
 463
 464   // Quantize bias node
 465   bool visit(luci::CircleNode *node)
 466   {
 467     // Check if this is already quantized
 468     if (is_quantized(node))
 469       return false;
 470
 471     // Check if this is bias
 472     auto iw = get_input_weight_of_bias(node);
 473     if (iw.first == nullptr || iw.second == nullptr)
 474       return false;
 475
 476     auto input = loco::must_cast<luci::CircleNode *>(iw.first);
 477     auto weight = loco::must_cast<luci::CircleNode *>(iw.second);
 478
 479     if (granularity == QuantizationGranularity::ChannelWise)
 480     {
 481       assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
 482       auto input_scale = input->quantparam()->scale[0];
 483
 484       assert(weight->quantparam() != nullptr); // weight scale's channel-wise
 485       auto weight_scale = weight->quantparam()->scale;
 486
 487       auto circle_const = loco::must_cast<luci::CircleConst *>(node);
 488
 489       uint32_t size = circle_const->size<loco::DataType::FLOAT32>();
 490       assert(size == weight_scale.size());
 491       std::vector<float> scaling_factor(size);
 492       std::vector<int64_t> zp(size);
 493
 494       if (output_type == loco::DataType::U8)
 495       {
 496         quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
 497       }
 498       else if (output_type == loco::DataType::S16)
 499       {
 500         int16_quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
 501       }
 502       else
 503       {
 504         throw std::runtime_error("Unsupported quantization type.");
 505       }
 506
 507       auto quantparam = std::make_unique<CircleQuantParam>();
 508       quantparam->scale = scaling_factor;
 509       quantparam->zerop = zp;
 510       assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
 511       circle_const->quantparam(std::move(quantparam));
 512     }
 513     else
 514     {
 515       assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
 516       auto input_scale = input->quantparam()->scale[0];
 517
 518       assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
 519       auto weight_scale = weight->quantparam()->scale[0];
 520
 521       auto circle_const = loco::must_cast<luci::CircleConst *>(node);
 522       float scaling_factor{0};
 523       int64_t zp{0};
 524       asym_quant_bias_per_layer(circle_const, input_scale, weight_scale, &scaling_factor, &zp);
 525       auto quantparam = std::make_unique<CircleQuantParam>();
 526       quantparam->scale.push_back(scaling_factor);
 527       quantparam->zerop.push_back(zp);
 528       assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
 529       circle_const->quantparam(std::move(quantparam));
 530     }
 531     return false;
 532   }
 533 };
 534
 535 /**
 536  * @brief QuantizeWeights quantizes tensors for weights
 537  * @details Find min/max values on the fly and then quantize
 538  */
 539 struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
 540 {
 541   QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
 542       : input_type(input), output_type(output), granularity(gr)
 543   {
 544   }
 545
 546   loco::DataType input_type;
 547   loco::DataType output_type;
 548   QuantizationGranularity granularity;
 549
 550   // Quantize input tensors of each node
 551   bool visit(luci::CircleNode *node)
 552   {
 553     LOGGER(l);
 554     INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
 555     auto arity = node->arity();
 556     for (uint32_t i = 0; i < arity; i++)
 557     {
 558       auto input_node = node->arg(i);
 559       auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
 560
 561       // Check if this is already quantized
 562       if (is_quantized(circle_node))
 563         continue;
 564
 565       if (is_weights(circle_node))
 566       {
 567         auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
 568
 569         // Find min/max per channel-wise
 570         if (granularity == QuantizationGranularity::ChannelWise)
 571         {
 572           auto quantparam = circle_node->quantparam();
 573           if (quantparam == nullptr)
 574           {
 575             assert(false && "quantparam is nullptr");
 576             return false;
 577           }
 578
 579           auto min = quantparam->min;
 580           auto scaling_factor = quantparam->scale;
 581           int32_t channel_dim_index = 0;
 582
 583           if (output_type == loco::DataType::U8)
 584           {
 585             asym_wquant_per_channel(circle_const, min, scaling_factor, channel_dim_index);
 586           }
 587           else
 588           {
 589             sym_wquant_per_channel(circle_const, scaling_factor, channel_dim_index);
 590           }
 591           quantparam->min.clear();
 592           quantparam->max.clear();
 593           quantparam->quantized_dimension = channel_dim_index;
 594         }
 595         // Find min/max per layer-wise
 596         else
 597         {
 598           // Quantize using recorded quantparam
 599           auto quantparam = circle_node->quantparam();
 600           assert(quantparam != nullptr);
 601           assert(quantparam->min.size() == 1);   // only support layer-wise quant
 602           assert(quantparam->scale.size() == 1); // only support layer-wise quant
 603           auto min = quantparam->min[0];
 604           auto scaling_factor = quantparam->scale[0];
 605           asym_wquant_per_layer(circle_const, min, scaling_factor);
 606           quantparam->min.clear();
 607           quantparam->max.clear();
 608         }
 609       }
 610     }
 611     return false;
 612   }
 613 };
 614
 615 /**
 616  * @brief Quantize const input tensors using min/max of const values
 617  */
 618 void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
 619 {
 620   auto opcode = node->opcode();
 621   auto arity = node->arity();
 622
 623   loco::Node *input_node{nullptr};
 624   luci::CircleConst *const_node{nullptr};
 625
 626   switch (opcode)
 627   {
 628     case luci::CircleOpcode::CONV_2D:
 629     case luci::CircleOpcode::DEPTHWISE_CONV_2D:
 630     case luci::CircleOpcode::FULLY_CONNECTED:
 631     case luci::CircleOpcode::TRANSPOSE_CONV:
 632       // Handled in QuantizeWeights and QuantizeBias
 633       break;
 634
 635     case luci::CircleOpcode::CONCATENATION:
 636       // Handled in propagate_concat_quantparam
 637       break;
 638
 639     case luci::CircleOpcode::ARG_MAX:
 640     case luci::CircleOpcode::ARG_MIN:
 641     case luci::CircleOpcode::MEAN:
 642     case luci::CircleOpcode::PAD:
 643     case luci::CircleOpcode::REDUCE_ANY:
 644     case luci::CircleOpcode::REDUCE_PROD:
 645     case luci::CircleOpcode::REDUCE_MAX:
 646     case luci::CircleOpcode::REDUCE_MIN:
 647     case luci::CircleOpcode::RESHAPE:
 648     case luci::CircleOpcode::RESIZE_BILINEAR:
 649     case luci::CircleOpcode::RESIZE_NEAREST_NEIGHBOR:
 650     case luci::CircleOpcode::REVERSE_SEQUENCE:
 651     case luci::CircleOpcode::SUM:
 652     case luci::CircleOpcode::TILE:
 653     case luci::CircleOpcode::TOPK_V2:
 654     case luci::CircleOpcode::TRANSPOSE:
 655       // The second input of these Ops should not be quantized
 656       // Ex: axis, paddings
 657       input_node = node->arg(0);
 658       const_node = dynamic_cast<luci::CircleConst *>(input_node);
 659       if (const_node != nullptr)
 660         quant_const(const_node, output_type);
 661       break;
 662
 663     case luci::CircleOpcode::ADD:
 664     case luci::CircleOpcode::ADD_N:
 665     case luci::CircleOpcode::DIV:
 666     case luci::CircleOpcode::EQUAL:
 667     case luci::CircleOpcode::GREATER:
 668     case luci::CircleOpcode::GREATER_EQUAL:
 669     case luci::CircleOpcode::INSTANCE_NORM:
 670     case luci::CircleOpcode::LESS:
 671     case luci::CircleOpcode::LESS_EQUAL:
 672     case luci::CircleOpcode::MAXIMUM:
 673     case luci::CircleOpcode::MINIMUM:
 674     case luci::CircleOpcode::MUL:
 675     case luci::CircleOpcode::NOT_EQUAL:
 676     case luci::CircleOpcode::PRELU:
 677     case luci::CircleOpcode::SUB:
 678       // Quantize all const inputs using their values
 679       for (uint32_t i = 0; i < arity; i++)
 680       {
 681         input_node = node->arg(i);
 682         const_node = dynamic_cast<luci::CircleConst *>(input_node);
 683         if (const_node != nullptr)
 684           quant_const(const_node, output_type);
 685       }
 686       break;
 687
 688     default:
 689       for (uint32_t i = 0; i < arity; i++)
 690       {
 691         input_node = node->arg(i);
 692         const_node = dynamic_cast<luci::CircleConst *>(input_node);
 693         if (const_node != nullptr)
 694           throw std::runtime_error("Unsupported Op for const inputs");
 695       }
 696       break;
 697   }
 698 }
 699
 700 } // namespace
 701
 702 /** BEFORE
 703  *
 704  *         [CircleNode]             [CircleConst]
 705  *         (U8 qparam1)                 (FP32)
 706  *                   \                    /
 707  *                    \                  /
 708  *                    [CircleConcatenation]
 709  *                        (U8 qparam2)
 710  *
 711  *  AFTER
 712  *         [CircleNode]             [CircleConst]
 713  *         (U8 qparam2)             (U8 qparam2)
 714  *                   \                    /
 715  *                    \                  /
 716  *                    [CircleConcatenation]
 717  *                        (U8 qparam2)
 718  */
 719 void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataType quant_type)
 720 {
 721   assert(concat->quantparam() != nullptr);
 722
 723   const auto num_inputs = concat->numValues();
 724
 725   // Quantize const inputs using their values if concat has fused act function
 726   if (concat->fusedActivationFunction() != luci::FusedActFunc::NONE)
 727   {
 728     for (uint32_t i = 0; i < num_inputs; i++)
 729     {
 730       auto node = concat->arg(i);
 731       auto const_node = dynamic_cast<luci::CircleConst *>(node);
 732       if (const_node != nullptr)
 733         quant_const(const_node, quant_type);
 734     }
 735     return;
 736   }
 737
 738   for (uint32_t i = 0; i < num_inputs; i++)
 739   {
 740     auto node = loco::must_cast<luci::CircleNode *>(concat->arg(i));
 741
 742     // Skip if this input is CONCAT Op
 743     if (node->opcode() == luci::CircleOpcode::CONCATENATION)
 744       continue;
 745
 746     // Skip if this input is used by other Ops
 747     auto succs = loco::succs(node);
 748     if (succs.size() != 1)
 749     {
 750       if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
 751       {
 752         luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
 753         quant_const(const_node, quant_type);
 754       }
 755       continue;
 756     }
 757
 758     assert(succs.find(concat) != succs.end());
 759
 760     // Quantize constant values
 761     if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
 762     {
 763       luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
 764       if (const_node->dtype() != loco::DataType::FLOAT32)
 765         throw std::runtime_error("Unsupported data type for constant input of concatenation Op");
 766
 767       const auto concat_qparam = concat->quantparam();
 768       if (concat_qparam == nullptr)
 769         throw std::runtime_error("quantparam of concat is not found during propagation");
 770
 771       assert(concat_qparam->scale.size() == 1);
 772       const auto scaling_factor = concat_qparam->scale[0];
 773       const auto zerop = concat_qparam->zerop[0];
 774
 775       quant_const_values(const_node, scaling_factor, zerop, quant_type);
 776     }
 777     else
 778     {
 779       // Non-const input must have been quantized
 780       assert(node->quantparam() != nullptr);
 781     }
 782
 783     overwrite_quantparam(concat, node);
 784   }
 785 }
 786
 787 bool QuantizeWithMinMaxPass::run(loco::Graph *g)
 788 {
 789   LOGGER(l);
 790   INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
 791
 792   // Quantize activation
 793   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 794   {
 795     QuantizeActivation qa(_input_dtype, _output_dtype);
 796     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 797     circle_node->accept(&qa);
 798   }
 799
 800   // Quantize weights
 801   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 802   {
 803     QuantizeWeights qw(_input_dtype, _output_dtype, _granularity);
 804     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 805     circle_node->accept(&qw);
 806   }
 807
 808   // Quantize bias
 809   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 810   {
 811     QuantizeBias qb(_input_dtype, _output_dtype, _granularity);
 812     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 813     circle_node->accept(&qb);
 814   }
 815
 816   // Quantize const inputs other than weights and bias
 817   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 818   {
 819     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 820     quantize_const_inputs(circle_node, _output_dtype);
 821   }
 822
 823   // Propagate quantization parameters of concat Op
 824   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 825   {
 826     auto concat = dynamic_cast<luci::CircleConcatenation *>(node);
 827     if (not concat)
 828       continue;
 829
 830     // Propagate qparam of concat to its inputs if
 831     // (1) concat is uint8-quantized
 832     // (2) concat has no fused activation function
 833     // (3) the input is not concatenation Op
 834     // (4) the input is not produced to Ops other than concat
 835     propagate_concat_quantparam(concat, _output_dtype);
 836   }
 837
 838   // Update output dtype
 839   auto graph_outputs = g->outputs();
 840   for (auto node : loco::output_nodes(g))
 841   {
 842     auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
 843     if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
 844     {
 845       circle_node->dtype(_output_dtype);
 846       auto graph_output = graph_outputs->at(circle_node->index());
 847       graph_output->dtype(_output_dtype);
 848     }
 849   }
 850
 851   INFO(l) << "QuantizeWithMinMaxPass End" << std::endl;
 852   return false; // one time run
 853 }
 854
 855 } // namespace luci