compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "luci/Pass/QuantizeWithMinMaxPass.h"
  18 #include "QuantizationUtils.h"
  19
  20 #include <luci/IR/CircleNodes.h>
  21 #include <luci/IR/CircleNodeVisitor.h>
  22 #include <luci/Log.h>
  23
  24 #include <oops/UserExn.h>
  25
  26 #include <iostream>
  27 #include <cmath>
  28
  29 namespace luci
  30 {
  31
  32 namespace
  33 {
  34
  35 // Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer
  36 // If true, return <input, weight> pair of the successor node (used to quantize bias)
  37 // If flase, return <nullptr, nullptr>
  38 std::pair<loco::Node *, loco::Node *> get_input_weight_of_bias(CircleNode *node)
  39 {
  40   auto circle_const = dynamic_cast<CircleConst *>(node);
  41   if (circle_const == nullptr)
  42     return std::make_pair(nullptr, nullptr);
  43
  44   auto succs = loco::succs(node);
  45   if (succs.size() != 1) // assume bias is used by only one node
  46     return std::make_pair(nullptr, nullptr);
  47
  48   for (auto out : succs)
  49   {
  50     auto conv = dynamic_cast<CircleConv2D *>(out);
  51     if (conv != nullptr && conv->bias() == circle_const)
  52     {
  53       assert(conv->input() != nullptr);
  54       assert(conv->filter() != nullptr);
  55       return std::make_pair(conv->input(), conv->filter());
  56     }
  57     auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
  58     if (dw_conv != nullptr && dw_conv->bias() == circle_const)
  59     {
  60       assert(dw_conv->input() != nullptr);
  61       assert(dw_conv->filter() != nullptr);
  62       return std::make_pair(dw_conv->input(), dw_conv->filter());
  63     }
  64     auto fc = dynamic_cast<CircleFullyConnected *>(out);
  65     if (fc != nullptr && fc->bias() == circle_const)
  66     {
  67       assert(fc->input() != nullptr);
  68       assert(fc->weights() != nullptr);
  69       return std::make_pair(fc->input(), fc->weights());
  70     }
  71   }
  72   return std::make_pair(nullptr, nullptr);
  73 }
  74
  75 void asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
  76                                float *scaling_factor, int64_t *zp)
  77 {
  78   float scale = input_scale * weight_scale;
  79   const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
  80
  81   uint32_t size = node->size<loco::DataType::FLOAT32>();
  82   std::vector<int32_t> quantized_values(size);
  83   for (uint32_t i = 0; i < size; ++i)
  84   {
  85     quantized_values[i] =
  86         static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
  87   }
  88
  89   node->dtype(loco::DataType::S32);      // change the type of tensor
  90   node->size<loco::DataType::S32>(size); // resize tensor
  91   const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
  92   const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
  93   for (uint32_t i = 0; i < size; ++i)
  94   {
  95     node->at<loco::DataType::S32>(i) =
  96         std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
  97   }
  98   *scaling_factor = scale;
  99   *zp = 0;
 100 }
 101
 102 void quant_bias_per_channel(CircleConst *node, float input_scale, std::vector<float> &weight_scale,
 103                             std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
 104 {
 105   float scaling_factor_inv{0};
 106
 107   uint32_t size = node->size<loco::DataType::FLOAT32>();
 108   std::vector<int32_t> quantized_values(size);
 109
 110   for (uint32_t i = 0; i < size; ++i)
 111   {
 112     scaling_factor[i] = input_scale * weight_scale[i];
 113     scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
 114     quantized_values[i] =
 115         static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
 116     zp[i] = 0;
 117   }
 118
 119   node->dtype(loco::DataType::S32);      // change the type of tensor
 120   node->size<loco::DataType::S32>(size); // resize tensor
 121   const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
 122   const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
 123   for (uint32_t i = 0; i < size; ++i)
 124   {
 125     node->at<loco::DataType::S32>(i) =
 126         std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 127   }
 128 }
 129
 130 bool has_min_max(const CircleNode *node)
 131 {
 132   return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
 133 }
 134
 135 bool is_quantized(const CircleNode *node)
 136 {
 137   return node->dtype() == loco::DataType::U8 || // activation, weight
 138          node->dtype() == loco::DataType::S32;  // bias
 139 }
 140
 141 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
 142                             int32_t &channel_dim_index)
 143 {
 144   assert(node->dtype() == loco::DataType::FLOAT32);
 145
 146   const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
 147   const int32_t kMinScale = -kMaxScale;
 148
 149   uint32_t size = node->size<loco::DataType::FLOAT32>();
 150   std::vector<int32_t> quantized_values(size);
 151
 152   loco::TensorShape dimension;
 153   dimension.rank(4);
 154   uint32_t indices[4] = {
 155       0,
 156   };
 157
 158   if (!get_channel_dim_index(node, dimension, channel_dim_index))
 159   {
 160     assert(false);
 161     return;
 162   }
 163
 164   for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
 165   {
 166     for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
 167     {
 168       for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
 169       {
 170         for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
 171         {
 172           int channel_idx = indices[channel_dim_index];
 173           const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
 174           auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
 175           quantized_values[cal_offset(dimension, indices)] =
 176               static_cast<int32_t>(std::round(data * scaling_factor_inv));
 177         }
 178       }
 179     }
 180   }
 181
 182   node->dtype(loco::DataType::S16);      // change the type of tensor
 183   node->size<loco::DataType::S16>(size); // resize tensor
 184   for (uint32_t i = 0; i < size; ++i)
 185   {
 186     node->at<loco::DataType::S16>(i) =
 187         std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 188   }
 189 }
 190
 191 void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
 192                              std::vector<float> &scaling_factor, int32_t &channel_dim_index)
 193 {
 194   assert(node->dtype() == loco::DataType::FLOAT32);
 195
 196   const int32_t kMinScale = 0;
 197   const int32_t kMaxScale = 255;
 198
 199   uint32_t size = node->size<loco::DataType::FLOAT32>();
 200   std::vector<int32_t> quantized_values(size);
 201
 202   loco::TensorShape dimension;
 203   dimension.rank(4);
 204   uint32_t indices[4] = {
 205       0,
 206   };
 207
 208   if (!get_channel_dim_index(node, dimension, channel_dim_index))
 209   {
 210     assert(false);
 211     return;
 212   }
 213
 214   for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
 215   {
 216     for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
 217     {
 218       for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
 219       {
 220         for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
 221         {
 222           int channel_idx = indices[channel_dim_index];
 223           const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
 224           auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
 225           quantized_values[cal_offset(dimension, indices)] =
 226               static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
 227         }
 228       }
 229     }
 230   }
 231
 232   node->dtype(loco::DataType::U8);      // change the type of tensor
 233   node->size<loco::DataType::U8>(size); // resize tensor
 234   for (uint32_t i = 0; i < size; ++i)
 235   {
 236     node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 237   }
 238 }
 239
 240 void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
 241 {
 242   const int32_t kMinScale = 0;
 243   const int32_t kMaxScale = 255;
 244
 245   uint32_t size = node->size<loco::DataType::FLOAT32>();
 246
 247   const float scaling_factor_inv = 1.0 / scaling_factor;
 248   std::vector<int32_t> quantized_values(size);
 249   for (uint32_t i = 0; i < size; ++i)
 250   {
 251     auto data = node->at<loco::DataType::FLOAT32>(i);
 252     quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
 253   }
 254
 255   node->dtype(loco::DataType::U8);      // change the type of tensor
 256   node->size<loco::DataType::U8>(size); // resize tensor
 257   for (uint32_t i = 0; i < size; ++i)
 258   {
 259     node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 260   }
 261 }
 262
 263 // Check if node is weights of conv2d, depthwise_conv2d, or fully_connected layer
 264 bool is_weights(CircleNode *node)
 265 {
 266   auto circle_const = dynamic_cast<CircleConst *>(node);
 267   if (circle_const == nullptr)
 268     return false;
 269
 270   auto succs = loco::succs(node);
 271   if (succs.size() != 1) // assume weights is used by only one node
 272     return false;
 273
 274   for (auto out : succs)
 275   {
 276     auto conv = dynamic_cast<CircleConv2D *>(out);
 277     if (conv != nullptr && conv->filter() == circle_const)
 278       return true;
 279
 280     auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
 281     if (dw_conv != nullptr && dw_conv->filter() == circle_const)
 282       return true;
 283
 284     auto t_conv = dynamic_cast<CircleTransposeConv *>(out);
 285     if (t_conv != nullptr && t_conv->filter() == circle_const && circle_const->rank() == 4)
 286       return true;
 287
 288     auto fc = dynamic_cast<CircleFullyConnected *>(out);
 289     if (fc != nullptr && fc->weights() == circle_const)
 290       return true;
 291   }
 292   return false;
 293 }
 294
 295 /**
 296  * @brief QuantizeActivation quantizes tensors for activations
 297  * @details Quantize using recorded min/max values
 298  */
 299 struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
 300 {
 301   QuantizeActivation(loco::DataType input, loco::DataType output)
 302       : input_type(input), output_type(output)
 303   {
 304   }
 305
 306   loco::DataType input_type;
 307   loco::DataType output_type;
 308
 309   // Quantize input tensors of each node
 310   bool visit(luci::CircleNode *node)
 311   {
 312     LOGGER(l);
 313     INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
 314     auto arity = node->arity();
 315     for (uint32_t i = 0; i < arity; i++)
 316     {
 317       auto input_node = node->arg(i);
 318       auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
 319
 320       // Check if this is already quantized
 321       if (is_quantized(circle_node))
 322         continue;
 323
 324       // Check if this is bias (bias is quantized later)
 325       auto iw = get_input_weight_of_bias(circle_node);
 326       if (iw.first != nullptr && iw.second != nullptr)
 327         continue;
 328
 329       // Check if this is activation
 330       // We assume min/max are recorded only for activations
 331       if (has_min_max(circle_node) && !is_weights(circle_node))
 332       {
 333         // Quantize using recorded min/max
 334         auto quantparam = circle_node->quantparam();
 335         assert(quantparam->min.size() == 1); // only support layer-wise quant
 336         assert(quantparam->max.size() == 1); // only support layer-wise quant
 337         auto min = quantparam->min[0];
 338         auto max = quantparam->max[0];
 339
 340         float scaling_factor{0};
 341         int64_t zp{0};
 342         float nudged_min{0};
 343         float nudged_max{0};
 344
 345         if (output_type == loco::DataType::U8)
 346         {
 347           compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
 348           circle_node->dtype(loco::DataType::U8);
 349         }
 350         else
 351         {
 352           compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
 353           circle_node->dtype(loco::DataType::S16);
 354         }
 355
 356         circle_node->quantparam()->min.clear();
 357         circle_node->quantparam()->max.clear();
 358         circle_node->quantparam()->scale.push_back(scaling_factor);
 359         circle_node->quantparam()->zerop.push_back(zp);
 360       }
 361     }
 362     return false;
 363   }
 364 };
 365
 366 struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
 367 {
 368   QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
 369       : input_type(input), output_type(output), granularity(gr)
 370   {
 371   }
 372
 373   loco::DataType input_type;
 374   loco::DataType output_type;
 375   QuantizationGranularity granularity;
 376
 377   // Quantize bias node
 378   bool visit(luci::CircleNode *node)
 379   {
 380     // Check if this is already quantized
 381     if (is_quantized(node))
 382       return false;
 383
 384     // Check if this is bias
 385     auto iw = get_input_weight_of_bias(node);
 386     if (iw.first == nullptr || iw.second == nullptr)
 387       return false;
 388
 389     auto input = loco::must_cast<luci::CircleNode *>(iw.first);
 390     auto weight = loco::must_cast<luci::CircleNode *>(iw.second);
 391
 392     if (granularity == QuantizationGranularity::ChannelWise)
 393     {
 394       assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
 395       auto input_scale = input->quantparam()->scale[0];
 396
 397       assert(weight->quantparam() != nullptr); // weight scale's channel-wise
 398       auto weight_scale = weight->quantparam()->scale;
 399
 400       auto circle_const = loco::must_cast<luci::CircleConst *>(node);
 401
 402       uint32_t size = circle_const->size<loco::DataType::FLOAT32>();
 403       assert(size == weight_scale.size());
 404       std::vector<float> scaling_factor(size);
 405       std::vector<int64_t> zp(size);
 406
 407       quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
 408
 409       auto quantparam = std::make_unique<CircleQuantParam>();
 410       quantparam->scale = scaling_factor;
 411       quantparam->zerop = zp;
 412       assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
 413       circle_const->quantparam(std::move(quantparam));
 414     }
 415     else
 416     {
 417       assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
 418       auto input_scale = input->quantparam()->scale[0];
 419
 420       assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
 421       auto weight_scale = weight->quantparam()->scale[0];
 422
 423       auto circle_const = loco::must_cast<luci::CircleConst *>(node);
 424       float scaling_factor{0};
 425       int64_t zp{0};
 426       asym_quant_bias_per_layer(circle_const, input_scale, weight_scale, &scaling_factor, &zp);
 427       auto quantparam = std::make_unique<CircleQuantParam>();
 428       quantparam->scale.push_back(scaling_factor);
 429       quantparam->zerop.push_back(zp);
 430       assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
 431       circle_const->quantparam(std::move(quantparam));
 432     }
 433     return false;
 434   }
 435 };
 436
 437 /**
 438  * @brief QuantizeWeights quantizes tensors for weights
 439  * @details Find min/max values on the fly and then quantize
 440  */
 441 struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
 442 {
 443   QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
 444       : input_type(input), output_type(output), granularity(gr)
 445   {
 446   }
 447
 448   loco::DataType input_type;
 449   loco::DataType output_type;
 450   QuantizationGranularity granularity;
 451
 452   // Quantize input tensors of each node
 453   bool visit(luci::CircleNode *node)
 454   {
 455     LOGGER(l);
 456     INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
 457     auto arity = node->arity();
 458     for (uint32_t i = 0; i < arity; i++)
 459     {
 460       auto input_node = node->arg(i);
 461       auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
 462
 463       // Check if this is already quantized
 464       if (is_quantized(circle_node))
 465         continue;
 466
 467       if (is_weights(circle_node))
 468       {
 469         auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
 470
 471         // Find min/max per channel-wise
 472         if (granularity == QuantizationGranularity::ChannelWise)
 473         {
 474           auto quantparam = circle_node->quantparam();
 475           if (quantparam == nullptr)
 476           {
 477             assert(false && "quantparam is nullptr");
 478             return false;
 479           }
 480
 481           auto min = quantparam->min;
 482           auto scaling_factor = quantparam->scale;
 483           int32_t channel_dim_index = 0;
 484
 485           if (output_type == loco::DataType::U8)
 486           {
 487             asym_wquant_per_channel(circle_const, min, scaling_factor, channel_dim_index);
 488           }
 489           else
 490           {
 491             sym_wquant_per_channel(circle_const, scaling_factor, channel_dim_index);
 492           }
 493           quantparam->min.clear();
 494           quantparam->max.clear();
 495           quantparam->quantized_dimension = channel_dim_index;
 496         }
 497         // Find min/max per layer-wise
 498         else
 499         {
 500           // Quantize using recorded quantparam
 501           auto quantparam = circle_node->quantparam();
 502           assert(quantparam != nullptr);
 503           assert(quantparam->min.size() == 1);   // only support layer-wise quant
 504           assert(quantparam->scale.size() == 1); // only support layer-wise quant
 505           auto min = quantparam->min[0];
 506           auto scaling_factor = quantparam->scale[0];
 507           asym_wquant_per_layer(circle_const, min, scaling_factor);
 508           quantparam->min.clear();
 509           quantparam->max.clear();
 510         }
 511       }
 512     }
 513     return false;
 514   }
 515 };
 516
 517 } // namespace
 518
 519 bool QuantizeWithMinMaxPass::run(loco::Graph *g)
 520 {
 521   LOGGER(l);
 522   INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
 523
 524   // Quantize activation
 525   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 526   {
 527     QuantizeActivation qa(_input_dtype, _output_dtype);
 528     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 529     circle_node->accept(&qa);
 530   }
 531
 532   // Quantize weights
 533   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 534   {
 535     QuantizeWeights qw(_input_dtype, _output_dtype, _granularity);
 536     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 537     circle_node->accept(&qw);
 538   }
 539
 540   // Quantize bias
 541   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 542   {
 543     QuantizeBias qb(_input_dtype, _output_dtype, _granularity);
 544     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 545     circle_node->accept(&qb);
 546   }
 547
 548   // Update output dtype
 549   auto graph_outputs = g->outputs();
 550   for (auto node : loco::output_nodes(g))
 551   {
 552     auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
 553     if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
 554     {
 555       circle_node->dtype(_output_dtype);
 556       auto graph_output = graph_outputs->at(circle_node->index());
 557       graph_output->dtype(_output_dtype);
 558     }
 559   }
 560
 561   INFO(l) << "QuantizeWithMinMaxPass End" << std::endl;
 562   return false; // one time run
 563 }
 564
 565 } // namespace luci