compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "luci/Pass/QuantizeWithMinMaxPass.h"
  18 #include "QuantizationUtils.h"
  19
  20 #include <luci/IR/CircleNodes.h>
  21 #include <luci/IR/CircleNodeVisitor.h>
  22 #include <luci/Log.h>
  23
  24 #include <oops/UserExn.h>
  25
  26 #include <iostream>
  27 #include <cmath>
  28
  29 namespace luci
  30 {
  31
  32 namespace
  33 {
  34
  35 // Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer
  36 // If true, return <input, weight> pair of the successor node (used to quantize bias)
  37 // If flase, return <nullptr, nullptr>
  38 std::pair<loco::Node *, loco::Node *> get_input_weight_of_bias(CircleNode *node)
  39 {
  40   auto circle_const = dynamic_cast<CircleConst *>(node);
  41   if (circle_const == nullptr)
  42     return std::make_pair(nullptr, nullptr);
  43
  44   auto succs = loco::succs(node);
  45   if (succs.size() != 1) // assume bias is used by only one node
  46     return std::make_pair(nullptr, nullptr);
  47
  48   for (auto out : succs)
  49   {
  50     auto conv = dynamic_cast<CircleConv2D *>(out);
  51     if (conv != nullptr && conv->bias() == circle_const)
  52     {
  53       assert(conv->input() != nullptr);
  54       assert(conv->filter() != nullptr);
  55       return std::make_pair(conv->input(), conv->filter());
  56     }
  57     auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
  58     if (dw_conv != nullptr && dw_conv->bias() == circle_const)
  59     {
  60       assert(dw_conv->input() != nullptr);
  61       assert(dw_conv->filter() != nullptr);
  62       return std::make_pair(dw_conv->input(), dw_conv->filter());
  63     }
  64     auto fc = dynamic_cast<CircleFullyConnected *>(out);
  65     if (fc != nullptr && fc->bias() == circle_const)
  66     {
  67       assert(fc->input() != nullptr);
  68       assert(fc->weights() != nullptr);
  69       return std::make_pair(fc->input(), fc->weights());
  70     }
  71   }
  72   return std::make_pair(nullptr, nullptr);
  73 }
  74
  75 void asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
  76                                float *scaling_factor, int64_t *zp)
  77 {
  78   float scale = input_scale * weight_scale;
  79   const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
  80
  81   uint32_t size = node->size<loco::DataType::FLOAT32>();
  82   std::vector<int32_t> quantized_values(size);
  83   for (uint32_t i = 0; i < size; ++i)
  84   {
  85     quantized_values[i] =
  86         static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
  87   }
  88
  89   node->dtype(loco::DataType::S32);      // change the type of tensor
  90   node->size<loco::DataType::S32>(size); // resize tensor
  91   const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
  92   const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
  93   for (uint32_t i = 0; i < size; ++i)
  94   {
  95     node->at<loco::DataType::S32>(i) =
  96         std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
  97   }
  98   *scaling_factor = scale;
  99   *zp = 0;
 100 }
 101
 102 void quant_bias_per_channel(CircleConst *node, float input_scale, std::vector<float> &weight_scale,
 103                             std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
 104 {
 105   float scaling_factor_inv{0};
 106
 107   uint32_t size = node->size<loco::DataType::FLOAT32>();
 108   std::vector<int32_t> quantized_values(size);
 109
 110   for (uint32_t i = 0; i < size; ++i)
 111   {
 112     scaling_factor[i] = input_scale * weight_scale[i];
 113     scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
 114     quantized_values[i] =
 115         static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
 116     zp[i] = 0;
 117   }
 118
 119   node->dtype(loco::DataType::S32);      // change the type of tensor
 120   node->size<loco::DataType::S32>(size); // resize tensor
 121   const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
 122   const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
 123   for (uint32_t i = 0; i < size; ++i)
 124   {
 125     node->at<loco::DataType::S32>(i) =
 126         std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 127   }
 128 }
 129
 130 bool has_min_max(const CircleNode *node)
 131 {
 132   return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
 133 }
 134
 135 bool is_quantized(const CircleNode *node)
 136 {
 137   return node->dtype() == loco::DataType::U8 || // activation, weight
 138          node->dtype() == loco::DataType::S32;  // bias
 139 }
 140
 141 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor)
 142 {
 143   assert(node->dtype() == loco::DataType::FLOAT32);
 144
 145   const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
 146   const int32_t kMinScale = -kMaxScale;
 147
 148   uint32_t size = node->size<loco::DataType::FLOAT32>();
 149   std::vector<int32_t> quantized_values(size);
 150
 151   loco::TensorShape dimension;
 152   dimension.rank(4);
 153   uint32_t indices[4] = {
 154       0,
 155   };
 156   int channel_dim_index{0};
 157
 158   if (!get_channel_dim_index(node, dimension, channel_dim_index))
 159   {
 160     assert(false);
 161     return;
 162   }
 163
 164   for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
 165   {
 166     for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
 167     {
 168       for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
 169       {
 170         for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
 171         {
 172           int channel_idx = indices[channel_dim_index];
 173           const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
 174           auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
 175           quantized_values[cal_offset(dimension, indices)] =
 176               static_cast<int32_t>(std::round(data * scaling_factor_inv));
 177         }
 178       }
 179     }
 180   }
 181
 182   node->dtype(loco::DataType::S16);      // change the type of tensor
 183   node->size<loco::DataType::S16>(size); // resize tensor
 184   for (uint32_t i = 0; i < size; ++i)
 185   {
 186     node->at<loco::DataType::S16>(i) =
 187         std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 188   }
 189 }
 190
 191 void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
 192                              std::vector<float> &scaling_factor)
 193 {
 194   assert(node->dtype() == loco::DataType::FLOAT32);
 195
 196   const int32_t kMinScale = 0;
 197   const int32_t kMaxScale = 255;
 198
 199   uint32_t size = node->size<loco::DataType::FLOAT32>();
 200   std::vector<int32_t> quantized_values(size);
 201
 202   loco::TensorShape dimension;
 203   dimension.rank(4);
 204   uint32_t indices[4] = {
 205       0,
 206   };
 207   int channel_dim_index{0};
 208
 209   if (!get_channel_dim_index(node, dimension, channel_dim_index))
 210   {
 211     assert(false);
 212     return;
 213   }
 214
 215   for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
 216   {
 217     for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
 218     {
 219       for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
 220       {
 221         for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
 222         {
 223           int channel_idx = indices[channel_dim_index];
 224           const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
 225           auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
 226           quantized_values[cal_offset(dimension, indices)] =
 227               static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
 228         }
 229       }
 230     }
 231   }
 232
 233   node->dtype(loco::DataType::U8);      // change the type of tensor
 234   node->size<loco::DataType::U8>(size); // resize tensor
 235   for (uint32_t i = 0; i < size; ++i)
 236   {
 237     node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 238   }
 239 }
 240
 241 void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
 242 {
 243   const int32_t kMinScale = 0;
 244   const int32_t kMaxScale = 255;
 245
 246   uint32_t size = node->size<loco::DataType::FLOAT32>();
 247
 248   const float scaling_factor_inv = 1.0 / scaling_factor;
 249   std::vector<int32_t> quantized_values(size);
 250   for (uint32_t i = 0; i < size; ++i)
 251   {
 252     auto data = node->at<loco::DataType::FLOAT32>(i);
 253     quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
 254   }
 255
 256   node->dtype(loco::DataType::U8);      // change the type of tensor
 257   node->size<loco::DataType::U8>(size); // resize tensor
 258   for (uint32_t i = 0; i < size; ++i)
 259   {
 260     node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 261   }
 262 }
 263
 264 // Check if node is weights of conv2d, depthwise_conv2d, or fully_connected layer
 265 bool is_weights(CircleNode *node)
 266 {
 267   auto circle_const = dynamic_cast<CircleConst *>(node);
 268   if (circle_const == nullptr)
 269     return false;
 270
 271   auto succs = loco::succs(node);
 272   if (succs.size() != 1) // assume weights is used by only one node
 273     return false;
 274
 275   for (auto out : succs)
 276   {
 277     auto conv = dynamic_cast<CircleConv2D *>(out);
 278     if (conv != nullptr && conv->filter() == circle_const)
 279       return true;
 280
 281     auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
 282     if (dw_conv != nullptr && dw_conv->filter() == circle_const)
 283       return true;
 284
 285     auto fc = dynamic_cast<CircleFullyConnected *>(out);
 286     if (fc != nullptr && fc->weights() == circle_const)
 287       return true;
 288   }
 289   return false;
 290 }
 291
 292 /**
 293  * @brief QuantizeActivation quantizes tensors for activations
 294  * @details Quantize using recorded min/max values
 295  */
 296 struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
 297 {
 298   QuantizeActivation(loco::DataType input, loco::DataType output)
 299       : input_type(input), output_type(output)
 300   {
 301   }
 302
 303   loco::DataType input_type;
 304   loco::DataType output_type;
 305
 306   // Quantize input tensors of each node
 307   bool visit(luci::CircleNode *node)
 308   {
 309     LOGGER(l);
 310     INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
 311     auto arity = node->arity();
 312     for (uint32_t i = 0; i < arity; i++)
 313     {
 314       auto input_node = node->arg(i);
 315       auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
 316
 317       // Check if this is already quantized
 318       if (is_quantized(circle_node))
 319         continue;
 320
 321       // Check if this is bias (bias is quantized later)
 322       auto iw = get_input_weight_of_bias(circle_node);
 323       if (iw.first != nullptr && iw.second != nullptr)
 324         continue;
 325
 326       // Check if this is activation
 327       // We assume min/max are recorded only for activations
 328       if (has_min_max(circle_node) && !is_weights(circle_node))
 329       {
 330         // Quantize using recorded min/max
 331         auto quantparam = circle_node->quantparam();
 332         assert(quantparam->min.size() == 1); // only support layer-wise quant
 333         assert(quantparam->max.size() == 1); // only support layer-wise quant
 334         auto min = quantparam->min[0];
 335         auto max = quantparam->max[0];
 336
 337         float scaling_factor{0};
 338         int64_t zp{0};
 339         float nudged_min{0};
 340         float nudged_max{0};
 341
 342         if (output_type == loco::DataType::U8)
 343         {
 344           compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
 345           circle_node->dtype(loco::DataType::U8);
 346         }
 347         else
 348         {
 349           compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
 350           circle_node->dtype(loco::DataType::S16);
 351         }
 352
 353         circle_node->quantparam()->max[0] = nudged_max;
 354         circle_node->quantparam()->min[0] = nudged_min;
 355         circle_node->quantparam()->scale.push_back(scaling_factor);
 356         circle_node->quantparam()->zerop.push_back(zp);
 357       }
 358     }
 359     return false;
 360   }
 361 };
 362
 363 struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
 364 {
 365   QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
 366       : input_type(input), output_type(output), granularity(gr)
 367   {
 368   }
 369
 370   loco::DataType input_type;
 371   loco::DataType output_type;
 372   QuantizationGranularity granularity;
 373
 374   // Quantize bias node
 375   bool visit(luci::CircleNode *node)
 376   {
 377     // Check if this is already quantized
 378     if (is_quantized(node))
 379       return false;
 380
 381     // Check if this is bias
 382     auto iw = get_input_weight_of_bias(node);
 383     if (iw.first == nullptr || iw.second == nullptr)
 384       return false;
 385
 386     auto input = loco::must_cast<luci::CircleNode *>(iw.first);
 387     auto weight = loco::must_cast<luci::CircleNode *>(iw.second);
 388
 389     if (granularity == QuantizationGranularity::ChannelWise)
 390     {
 391       assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
 392       auto input_scale = input->quantparam()->scale[0];
 393
 394       assert(weight->quantparam() != nullptr); // weight scale's channel-wise
 395       auto weight_scale = weight->quantparam()->scale;
 396
 397       auto circle_const = loco::must_cast<luci::CircleConst *>(node);
 398
 399       uint32_t size = circle_const->size<loco::DataType::FLOAT32>();
 400       assert(size == weight_scale.size());
 401       std::vector<float> scaling_factor(size);
 402       std::vector<int64_t> zp(size);
 403
 404       quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
 405
 406       auto quantparam = std::make_unique<CircleQuantParam>();
 407       quantparam->scale = scaling_factor;
 408       quantparam->zerop = zp;
 409       assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
 410       circle_const->quantparam(std::move(quantparam));
 411     }
 412     else
 413     {
 414       assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
 415       auto input_scale = input->quantparam()->scale[0];
 416
 417       assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
 418       auto weight_scale = weight->quantparam()->scale[0];
 419
 420       auto circle_const = loco::must_cast<luci::CircleConst *>(node);
 421       float scaling_factor{0};
 422       int64_t zp{0};
 423       asym_quant_bias_per_layer(circle_const, input_scale, weight_scale, &scaling_factor, &zp);
 424       auto quantparam = std::make_unique<CircleQuantParam>();
 425       quantparam->scale.push_back(scaling_factor);
 426       quantparam->zerop.push_back(zp);
 427       assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
 428       circle_const->quantparam(std::move(quantparam));
 429     }
 430     return false;
 431   }
 432 };
 433
 434 /**
 435  * @brief QuantizeWeights quantizes tensors for weights
 436  * @details Find min/max values on the fly and then quantize
 437  */
 438 struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
 439 {
 440   QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
 441       : input_type(input), output_type(output), granularity(gr)
 442   {
 443   }
 444
 445   loco::DataType input_type;
 446   loco::DataType output_type;
 447   QuantizationGranularity granularity;
 448
 449   // Quantize input tensors of each node
 450   bool visit(luci::CircleNode *node)
 451   {
 452     LOGGER(l);
 453     INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
 454     auto arity = node->arity();
 455     for (uint32_t i = 0; i < arity; i++)
 456     {
 457       auto input_node = node->arg(i);
 458       auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
 459
 460       // Check if this is already quantized
 461       if (is_quantized(circle_node))
 462         continue;
 463
 464       if (is_weights(circle_node))
 465       {
 466         auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
 467
 468         // Find min/max per channel-wise
 469         if (granularity == QuantizationGranularity::ChannelWise)
 470         {
 471           auto quantparam = circle_node->quantparam();
 472           assert(quantparam != nullptr);
 473           auto min = quantparam->min;
 474           auto scaling_factor = quantparam->scale;
 475
 476           if (output_type == loco::DataType::U8)
 477           {
 478             asym_wquant_per_channel(circle_const, min, scaling_factor);
 479           }
 480           else
 481           {
 482             sym_wquant_per_channel(circle_const, scaling_factor);
 483           }
 484         }
 485         // Find min/max per layer-wise
 486         else
 487         {
 488           // Quantize using recorded quantparam
 489           auto quantparam = circle_node->quantparam();
 490           assert(quantparam != nullptr);
 491           assert(quantparam->min.size() == 1);   // only support layer-wise quant
 492           assert(quantparam->scale.size() == 1); // only support layer-wise quant
 493           auto min = quantparam->min[0];
 494           auto scaling_factor = quantparam->scale[0];
 495           asym_wquant_per_layer(circle_const, min, scaling_factor);
 496         }
 497       }
 498     }
 499     return false;
 500   }
 501 };
 502
 503 } // namespace
 504
 505 bool QuantizeWithMinMaxPass::run(loco::Graph *g)
 506 {
 507   LOGGER(l);
 508   INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
 509
 510   // Quantize activation
 511   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 512   {
 513     QuantizeActivation qa(_input_dtype, _output_dtype);
 514     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 515     circle_node->accept(&qa);
 516   }
 517
 518   // Quantize weights
 519   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 520   {
 521     QuantizeWeights qw(_input_dtype, _output_dtype, _granularity);
 522     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 523     circle_node->accept(&qw);
 524   }
 525
 526   // Quantize bias
 527   for (auto node : loco::active_nodes(loco::output_nodes(g)))
 528   {
 529     QuantizeBias qb(_input_dtype, _output_dtype, _granularity);
 530     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
 531     circle_node->accept(&qb);
 532   }
 533
 534   // Update output dtype
 535   auto graph_outputs = g->outputs();
 536   for (auto node : loco::output_nodes(g))
 537   {
 538     auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
 539     if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
 540     {
 541       circle_node->dtype(_output_dtype);
 542       auto graph_output = graph_outputs->at(circle_node->index());
 543       graph_output->dtype(_output_dtype);
 544     }
 545   }
 546
 547   INFO(l) << "QuantizeWithMinMaxPass End" << std::endl;
 548   return false; // one time run
 549 }
 550
 551 } // namespace luci