compiler/luci/pass/src/QuantizeWeightsOnly.cpp

   1 /*
   2  * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "QuantizeWeightsOnly.h"
  18 #include "QuantizationUtils.h"
  19
  20 #include <luci/Service/Nodes/CircleConst.h>
  21 #include <luci/Log.h>
  22
  23 #include <cmath>
  24 #include <vector>
  25 #include <functional>
  26 #include <limits>
  27
  28 using namespace luci;
  29
  30 namespace
  31 {
  32
  33 using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
  34
  35 void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
  36 {
  37   loco::TensorShape dimension;
  38   dimension.rank(4);
  39   uint32_t indices[4] = {
  40     0,
  41   };
  42
  43   if (!get_channel_dim_index(node, dimension, channel_dim_index))
  44   {
  45     assert(false);
  46     return;
  47   }
  48
  49   for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
  50   {
  51     for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
  52     {
  53       for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
  54       {
  55         for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
  56         {
  57           func(indices, dimension, channel_dim_index);
  58         }
  59       }
  60     }
  61   }
  62 }
  63
  64 // TODO Reduce duplicate code with QuantizeDequantizeWeights
  65 template <loco::DataType out_type>
  66 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
  67                             std::vector<float> &scaling_factor, std::vector<float> &nudged_min,
  68                             std::vector<float> &nudged_max, int32_t &channel_dim_index)
  69 {
  70   assert(node->dtype() == loco::DataType::FLOAT32);
  71   assert(out_type == loco::DataType::S8 || out_type == loco::DataType::S16);
  72   const int32_t kMaxScale = (out_type == loco::DataType::S8) ? std::numeric_limits<int8_t>::max()
  73                                                              : std::numeric_limits<int16_t>::max();
  74   const int32_t kMinScale = -kMaxScale;
  75
  76   uint32_t size = node->size<loco::DataType::FLOAT32>();
  77   std::vector<int32_t> quantized_values(size);
  78
  79   for (size_t i = 0; i < min.size(); ++i)
  80   {
  81     compute_sym_scale(min[i], max[i], scaling_factor[i], nudged_min[i], nudged_max[i], out_type);
  82   }
  83
  84   auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
  85     int channel_idx = indices[channel_dim_index];
  86     const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
  87     auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
  88     data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
  89     data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
  90     quantized_values[cal_offset(dimension, indices)] =
  91       static_cast<int32_t>(std::round(data * scaling_factor_inv));
  92   };
  93
  94   iterate_per_channel(node, channel_dim_index, quantize);
  95
  96   node->dtype(out_type);      // change the type of tensor
  97   node->size<out_type>(size); // resize tensor
  98   for (uint32_t i = 0; i < size; ++i)
  99   {
 100     node->at<out_type>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
 101   }
 102 }
 103
 104 void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
 105                             int32_t &channel_dim_index)
 106 {
 107   loco::TensorShape dimension;
 108   dimension.rank(4);
 109
 110   if (!get_channel_dim_index(node, dimension, channel_dim_index))
 111   {
 112     throw std::runtime_error("Failed to find channel index in " + node->name());
 113   }
 114   auto size = dimension.dim(channel_dim_index).value();
 115
 116   std::vector<bool> has_min_max_value(size, false);
 117   min.resize(size);
 118   max.resize(size);
 119
 120   auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
 121     int channel_idx = indices[channel_dim_index];
 122     auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
 123     if (has_min_max_value[channel_idx])
 124     {
 125       min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
 126       max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
 127     }
 128     else
 129     {
 130       min[channel_idx] = data;
 131       max[channel_idx] = data;
 132       has_min_max_value[channel_idx] = true;
 133     }
 134   };
 135
 136   iterate_per_channel(node, channel_dim_index, cal_minmax);
 137 }
 138
 139 } // namespace
 140
 141 namespace luci
 142 {
 143
 144 void QuantizeWeightsOnly::quantize_weights(luci::CircleConst *weights)
 145 {
 146   // Find min/max per channel-wise
 147   if (granularity == QuantizationGranularity::ChannelWise)
 148   {
 149     auto quantparam = weights->quantparam();
 150     if (quantparam == nullptr)
 151     {
 152       // Find min/max on the fly
 153       // NOTE This is for the case when QuantizeDequantizeWeights is skipped
 154       // TODO Reduce duplicate codes
 155       std::vector<float> min;
 156       std::vector<float> max;
 157       int32_t channel_dim_index = 0;
 158
 159       cal_minmax_per_channel(weights, min, max, channel_dim_index);
 160
 161       std::vector<float> nudged_min(min.size());
 162       std::vector<float> nudged_max(min.size());
 163       std::vector<float> scaling_factor(min.size());
 164       std::vector<int64_t> zp(min.size());
 165
 166       if (output_type == loco::DataType::S8)
 167       {
 168         sym_wquant_per_channel<loco::DataType::S8>(weights, min, max, scaling_factor, nudged_min,
 169                                                    nudged_max, channel_dim_index);
 170       }
 171       else if (output_type == loco::DataType::S16)
 172       {
 173         sym_wquant_per_channel<loco::DataType::S16>(weights, min, max, scaling_factor, nudged_min,
 174                                                     nudged_max, channel_dim_index);
 175       }
 176       else
 177       {
 178         throw std::runtime_error("Weights-only quantization supports s8 and s16");
 179       }
 180
 181       auto quantparam = std::make_unique<CircleQuantParam>();
 182       quantparam->scale = scaling_factor;
 183       quantparam->zerop = zp;
 184       quantparam->quantized_dimension = channel_dim_index;
 185       weights->quantparam(std::move(quantparam));
 186
 187       return;
 188     }
 189   }
 190   else
 191     throw std::runtime_error("Weights-only quantization does not support layer-wise");
 192 }
 193
 194 void QuantizeWeightsOnly::visit(luci::CircleConv2D *node)
 195 {
 196   LOGGER(l);
 197   INFO(l) << "QuantizeWeightsOnly visits node: " << node->name() << std::endl;
 198
 199   auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
 200   if (!is_quantized(weights))
 201   {
 202     auto new_weights = luci::clone(weights);
 203     node->filter(new_weights);
 204     quantize_weights(new_weights);
 205   }
 206 }
 207
 208 void QuantizeWeightsOnly::visit(luci::CircleDepthwiseConv2D *node)
 209 {
 210   LOGGER(l);
 211   INFO(l) << "QuantizeWeightsOnly visits node: " << node->name() << std::endl;
 212
 213   auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
 214   if (!is_quantized(weights))
 215   {
 216     auto new_weights = luci::clone(weights);
 217     node->filter(new_weights);
 218     quantize_weights(new_weights);
 219   }
 220 }
 221
 222 void QuantizeWeightsOnly::visit(luci::CircleNode *) {}
 223
 224 } // namespace luci