compiler/circle-mpqsolver/src/bisection/ErrorApproximator.cpp

   1 /*
   2  * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "ErrorApproximator.h"
  18
  19 #include <cmath>
  20 #include <limits>
  21 #include <vector>
  22 #include <functional>
  23 #include <luci/IR/CircleNode.h>
  24
  25 namespace
  26 {
  27
  28 using namespace luci;
  29 using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
  30
  31 inline bool has_min_max(const CircleNode *node)
  32 {
  33   return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
  34 }
  35
  36 inline uint32_t cal_offset(const loco::TensorShape &dimension, uint32_t *indices)
  37 {
  38   return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
  39            dimension.dim(3).value() +
  40          indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
  41          indices[2] * dimension.dim(3).value() + indices[3];
  42 }
  43
  44 uint32_t get_channel_dim_index(const CircleNode *node)
  45 {
  46   uint32_t index = 0;
  47   auto opcode = node->opcode();
  48   switch (opcode)
  49   {
  50     case CircleOpcode::CONV_2D:
  51     case CircleOpcode::TRANSPOSE_CONV:
  52     case CircleOpcode::FULLY_CONNECTED:
  53       index = 0;
  54       break;
  55     case CircleOpcode::DEPTHWISE_CONV_2D:
  56       index = 3;
  57       break;
  58     default:
  59       throw std::runtime_error("Failed to find channel index in " + node->name());
  60   }
  61
  62   return index;
  63 }
  64
  65 bool set_weight_dim(const CircleNode *node, const CircleConst *weights,
  66                     loco::TensorShape &dimension)
  67 {
  68   auto opcode = node->opcode();
  69   switch (opcode)
  70   {
  71     case CircleOpcode::CONV_2D:
  72     case CircleOpcode::TRANSPOSE_CONV:
  73     case CircleOpcode::DEPTHWISE_CONV_2D:
  74       assert(node->rank() == 4);
  75       dimension.rank(node->rank());
  76       dimension.dim(0).set(weights->dim(0).value());
  77       dimension.dim(1).set(weights->dim(1).value());
  78       dimension.dim(2).set(weights->dim(2).value());
  79       dimension.dim(3).set(weights->dim(3).value());
  80       break;
  81     case CircleOpcode::FULLY_CONNECTED:
  82       assert(node->rank() == 2);
  83       dimension.rank(4);
  84       dimension.dim(0).set(weights->dim(0).value());
  85       dimension.dim(1).set(1); // Set FC layer like CONV
  86       dimension.dim(2).set(1);
  87       dimension.dim(3).set(weights->dim(1).value());
  88       break;
  89     default:
  90       return false;
  91   }
  92
  93   return true;
  94 }
  95
  96 loco::Node *get_weight(const CircleNode *node)
  97 {
  98   loco::Node *weight = nullptr;
  99   auto opcode = node->opcode();
 100   switch (opcode)
 101   {
 102     case CircleOpcode::CONV_2D:
 103     {
 104       auto conv = loco::must_cast<const CircleConv2D *>(node);
 105       weight = conv->filter();
 106     }
 107     break;
 108     case CircleOpcode::DEPTHWISE_CONV_2D:
 109     {
 110       auto dconv = loco::must_cast<const CircleDepthwiseConv2D *>(node);
 111       weight = dconv->filter();
 112     }
 113     break;
 114     case CircleOpcode::TRANSPOSE_CONV:
 115     {
 116       auto tconv = loco::must_cast<const CircleTransposeConv *>(node);
 117       weight = tconv->filter();
 118     }
 119     break;
 120     case CircleOpcode::FULLY_CONNECTED:
 121     {
 122       auto fc = loco::must_cast<const CircleFullyConnected *>(node);
 123       weight = fc->weights();
 124     }
 125     break;
 126     default:
 127       break;
 128   }
 129
 130   return weight;
 131 }
 132
 133 inline CircleConst *get_constant_weight(const CircleNode *node)
 134 {
 135   CircleConst *weight = dynamic_cast<CircleConst *>(get_weight(node));
 136   if (weight == nullptr)
 137   {
 138     throw std::runtime_error("Unsupported non-constant weights in convolution node " +
 139                              node->name());
 140   }
 141
 142   return weight;
 143 }
 144
 145 void iterate_per_channel(const CircleNode *node, IterFunc func)
 146 {
 147   CircleConst *weight = get_constant_weight(node);
 148
 149   loco::TensorShape dimension;
 150   set_weight_dim(node, weight, dimension);
 151   uint32_t indices[4] = {
 152     0,
 153   };
 154
 155   auto channel_dim_index = get_channel_dim_index(node);
 156
 157   for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
 158   {
 159     for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
 160     {
 161       for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
 162       {
 163         for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
 164         {
 165           func(indices, dimension, channel_dim_index);
 166         }
 167       }
 168     }
 169   }
 170 }
 171
 172 void cal_minmax_per_channel(const CircleNode *node, std::vector<float> &min,
 173                             std::vector<float> &max)
 174 {
 175   CircleConst *weight = get_constant_weight(node);
 176
 177   loco::TensorShape dimension;
 178   set_weight_dim(node, weight, dimension);
 179
 180   auto channel_dim_index = get_channel_dim_index(node);
 181   auto size = dimension.dim(channel_dim_index).value();
 182
 183   std::vector<bool> has_min_max_value(size, false);
 184   min.resize(size);
 185   max.resize(size);
 186
 187   auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension,
 188                         uint32_t channel_dim_index) {
 189     uint32_t channel_idx = indices[channel_dim_index];
 190     auto data = weight->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
 191     if (has_min_max_value[channel_idx])
 192     {
 193       min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
 194       max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
 195     }
 196     else
 197     {
 198       min[channel_idx] = data;
 199       max[channel_idx] = data;
 200       has_min_max_value[channel_idx] = true;
 201     }
 202   };
 203
 204   iterate_per_channel(node, cal_minmax);
 205 }
 206
 207 bool get_shape(const CircleNode *circle_node, std::vector<uint32_t> &shape)
 208 {
 209   if (circle_node->shape_status() == ShapeStatus::VALID)
 210   {
 211     auto rank = circle_node->rank();
 212     if (rank != 4)
 213       return false;
 214
 215     shape.resize(rank);
 216     for (uint32_t i = 0; i < rank; i++)
 217     {
 218       shape[i] = circle_node->dim(i).value();
 219     }
 220     return true;
 221   }
 222
 223   return false;
 224 }
 225
 226 /**
 227  * @brief get_additions_per_channel computes W * H * CIN * KW * KH.
 228  *
 229  * W, H - width/height of OFM; KW, KH - convolution kernel width/height;
 230  * CIN - number of channels in IFM (for depthwise its unity)
 231  * See
 232  * https://github.com/Samsung/ONE/pull/10170#discussion_r1065371638
 233  * for derivation.
 234  */
 235 uint32_t get_additions_per_channel(const CircleNode *node)
 236 {
 237   uint32_t adds_per_channel = 1;
 238   std::vector<uint32_t> ofm_shape;
 239   if (!get_shape(node, ofm_shape)) // [BATCH, W, H, channels_out]
 240   {
 241     throw std::runtime_error("Failed to find correct shape " + node->name());
 242   }
 243
 244   adds_per_channel *= ofm_shape[1] * ofm_shape[2]; // adds_per_channel *= W * H
 245
 246   auto weights = loco::must_cast<CircleNode *>(get_weight(node));
 247   {
 248     std::vector<uint32_t> w_shape;
 249     if (get_shape(weights, w_shape)) // [channels_out, k_x, k_y, channels_in]
 250     {
 251       adds_per_channel *= (w_shape[1] * w_shape[2]); // adds_per_channel *= k_x * k_y
 252     }
 253     if (node->opcode() != CircleOpcode::DEPTHWISE_CONV_2D)
 254     {
 255       // for not depthwise convolutions we need to scale it by CIN
 256       adds_per_channel *= w_shape[3]; // adds_per_channel *= c_in
 257     }
 258   }
 259
 260   return adds_per_channel;
 261 }
 262
 263 void get_min_max_ifm_values(const CircleNode *node, float &ci_min, float &ci_max)
 264 {
 265   auto preds = loco::preds(node);
 266   for (const auto &pred : preds)
 267   {
 268     auto parent_node = loco::must_cast<const luci::CircleNode *>(pred);
 269     if (has_min_max(parent_node))
 270     {
 271       auto quantparam = parent_node->quantparam();
 272       if (quantparam->min.size() > 0)
 273       {
 274         ci_min = quantparam->min[0];
 275         ci_max = quantparam->max[0];
 276       }
 277     }
 278   }
 279 }
 280
 281 /**
 282  * @brief Return upper bound of quantization error for CONV, DCONV, TCONV.
 283  *
 284  * See
 285  * https://github.com/Samsung/ONE/pull/10170#discussion_r1065371638 for details.
 286  */
 287 float approximate_conv(const CircleNode *node)
 288 {
 289   float volume_W_A_err = 0.f;
 290   {
 291     // activation min-max values
 292     float ci_min = 0.f;
 293     float ci_max = 0.f;
 294     get_min_max_ifm_values(node, ci_min, ci_max);
 295
 296     // channel-wise min, max
 297     std::vector<float> min_values;
 298     std::vector<float> max_values;
 299     cal_minmax_per_channel(node, min_values, max_values);
 300     assert(not min_values.empty());
 301     assert(not max_values.empty());
 302
 303     // ranges  = (max_values - min_values)
 304     std::vector<float> ranges;
 305     std::transform(max_values.begin(), max_values.end(), min_values.begin(),
 306                    std::back_inserter(ranges), std::minus<float>());
 307
 308     // maximal weight value across all channels
 309     float w_max = 0;
 310     {
 311       assert(max_values.size() == min_values.size());
 312       for (size_t i = 0; i < max_values.size(); ++i)
 313       {
 314         w_max = std::max(w_max, std::abs(max_values[i]));
 315         w_max = std::max(w_max, std::abs(min_values[i]));
 316       }
 317     }
 318
 319     // total weight quantization error across all channels
 320     // so maximal error of quantization is ~ (max_value - min_value) / 255
 321     // omitting 255 term we get that maximal error of quantization is just its range
 322     float sum_err = 0.f;
 323     for (auto cur_err : ranges)
 324     {
 325       sum_err += cur_err;
 326     }
 327
 328     uint32_t adds_per_channel = get_additions_per_channel(node);
 329     uint32_t num_of_channels = ranges.size();
 330
 331     // maximal error introduced by weights quantization (for all channels)
 332     volume_W_A_err = sum_err * std::max(::fabs(ci_max), ::fabs(ci_min));
 333     // plus total error introduced by activation quantization (for all channels)
 334     volume_W_A_err += w_max * num_of_channels * ::fabs(ci_max - ci_min);
 335     // scale by volume of adds per channel
 336     volume_W_A_err *= adds_per_channel;
 337     // scale to get more readable output values
 338     volume_W_A_err /= 1.e+6f;
 339   }
 340
 341   return volume_W_A_err;
 342 }
 343
 344 } // namespace
 345
 346 namespace mpqsolver
 347 {
 348 namespace bisection
 349 {
 350
 351 /**
 352  * How Approximate works?
 353  *
 354  * Currently it works just for convolution layers, but may be generalized for other types as well.
 355  * See discussion at https://github.com/Samsung/ONE/pull/10170#discussion_r1042246598
 356  * Convolution can be expressed as a matrix multiplication.
 357  * While quantizing we introduce quantization error into convolution operand (activations) as well
 358  * as into convolution weights. A_q * W_q = (A + q_err(A)) * (W + q_err(W)) = A * W + A * q_err(W) +
 359  * W * q_err(A) + q_err(A) * q_err(W), assuming q_err(A) * q_err(W) are negligible as quadratic
 360  * terms, we get A_q * W_q ~ A * W + A * q_err(W) +  W * q_err(A) , q_err - quantization error,
 361  * W - weight matrix, A - activations from previous layer (IFM), so quantization error of matrix
 362  * multiplication can be approximated as A * q_err(W) + W * q_err(A). Estimating its upper bound
 363  * we get A * q_err(W) + W * q_err(A) <=
 364  * number_of_additions * (A_max * (W_max - W_min) / 255 + W_max * (A_max - A_min) / 255)
 365  * The following code tries to get total error for quantizing convolution node into Q8.
 366  * It's just an heuristic (Metric sensitivity depends highly on derivatives as well).
 367  */
 368 float approximate(const CircleNode *node)
 369 {
 370   auto opcode = node->opcode();
 371   float qerror = 0.f;
 372   switch (opcode)
 373   {
 374     case CircleOpcode::DEPTHWISE_CONV_2D:
 375     case CircleOpcode::CONV_2D:
 376     case CircleOpcode::TRANSPOSE_CONV:
 377       qerror = approximate_conv(node);
 378       break;
 379     default: // TODO (FULLY_CONNECTED e.g.)
 380       qerror = 0.f;
 381   }
 382
 383   return qerror;
 384 }
 385
 386 } // namespace bisection
 387 } // namespace mpqsolver