compiler/circle-mpqsolver/src/bisection/BisectionSolver.cpp

   1 /*
   2  * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *    http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "BisectionSolver.h"
  18 #include "DepthParameterizer.h"
  19 #include "VISQErrorApproximator.h"
  20
  21 #include <core/ErrorMetric.h>
  22 #include <core/SolverOutput.h>
  23
  24 #include <luci/ImporterEx.h>
  25
  26 #include <cmath>
  27 #include <iostream>
  28
  29 using namespace mpqsolver::bisection;
  30
  31 namespace
  32 {
  33
  34 /**
  35  * @brief Compare errors of two disjoint subsets of a model sliced by cut_depth
  36  * @return True if the front part (< cut_depth) has larger errors than the rear part (>= cut_depth)
  37  */
  38 bool front_has_higher_error(const NodeDepthType &nodes_depth, const std::string &visq_path,
  39                             float cut_depth)
  40 {
  41   SolverOutput::get() << "\n>> Running bisection(auto) algorithm\n";
  42
  43   VISQErrorApproximator approximator;
  44   approximator.init(visq_path);
  45
  46   float error_at_input = 0;
  47   float error_at_output = 0;
  48   for (auto &iter : nodes_depth)
  49   {
  50     float cur_error = approximator.approximate(iter.first->name());
  51     if (iter.second < cut_depth)
  52     {
  53       error_at_input += cur_error;
  54     }
  55     else
  56     {
  57       error_at_output += cur_error;
  58     }
  59   }
  60
  61   SolverOutput::get() << "Qerror of front half: " << error_at_input << "\n";
  62   SolverOutput::get() << "Qerror of rear half: " << error_at_output << "\n";
  63   if (error_at_input > error_at_output)
  64   {
  65     SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
  66   }
  67   else
  68   {
  69     SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
  70   }
  71
  72   return error_at_input > error_at_output;
  73 }
  74
  75 std::unique_ptr<luci::Module> read_module(const std::string &path)
  76 {
  77   luci::ImporterEx importerex;
  78   auto module = importerex.importVerifyModule(path);
  79   if (module.get() == nullptr)
  80   {
  81     std::cerr << "ERROR: Failed to load " << path << std::endl;
  82     return nullptr;
  83   }
  84
  85   return module;
  86 }
  87
  88 } // namespace
  89
  90 BisectionSolver::BisectionSolver(const std::string &input_data_path, float qerror_ratio,
  91                                  const std::string &input_quantization,
  92                                  const std::string &output_quantization)
  93   : MPQSolver(input_data_path, qerror_ratio, input_quantization, output_quantization)
  94 {
  95   _quantizer = std::make_unique<core::Quantizer>(_input_quantization, _output_quantization);
  96 }
  97
  98 float BisectionSolver::evaluate(const core::DatasetEvaluator &evaluator,
  99                                 const std::string &flt_path, const std::string &def_quant,
 100                                 core::LayerParams &layers)
 101 {
 102   auto model = read_module(flt_path);
 103   // get fake quantized model for evaluation
 104   if (!_quantizer->fake_quantize(model.get(), def_quant, layers))
 105   {
 106     throw std::runtime_error("Failed to produce fake-quantized model.");
 107   }
 108
 109   return evaluator.evaluate(model.get());
 110 }
 111
 112 void BisectionSolver::algorithm(Algorithm algorithm) { _algorithm = algorithm; }
 113
 114 void BisectionSolver::setVisqPath(const std::string &visq_path) { _visq_data_path = visq_path; }
 115
 116 std::unique_ptr<luci::Module> BisectionSolver::run(const std::string &module_path)
 117 {
 118   auto module = read_module(module_path);
 119
 120   float min_depth = 0.f;
 121   float max_depth = 0.f;
 122   NodeDepthType nodes_depth;
 123   if (compute_depth(module.get(), nodes_depth, min_depth, max_depth) !=
 124       ParameterizerResult::SUCCESS)
 125   {
 126     std::cerr << "ERROR: Invalid graph for bisectioning" << std::endl;
 127     return nullptr;
 128   }
 129
 130   SolverOutput::get() << "\n>> Computing baseline qerrors\n";
 131
 132   std::unique_ptr<core::MAEMetric> metric = std::make_unique<core::MAEMetric>();
 133   core::DatasetEvaluator evaluator(module.get(), _input_data_path, *metric.get());
 134
 135   core::LayerParams layer_params;
 136   float int16_qerror =
 137     evaluate(evaluator, module_path, "int16" /* default quant_dtype */, layer_params);
 138   SolverOutput::get() << "Full int16 model qerror: " << int16_qerror << "\n";
 139
 140   float uint8_qerror =
 141     evaluate(evaluator, module_path, "uint8" /* default quant_dtype */, layer_params);
 142   SolverOutput::get() << "Full uint8 model qerror: " << uint8_qerror << "\n";
 143   _quantizer->set_hook(_hooks.get());
 144   if (_hooks)
 145   {
 146     _hooks->on_begin_solver(module_path, uint8_qerror, int16_qerror);
 147   }
 148
 149   if (int16_qerror > uint8_qerror)
 150   {
 151     throw std::runtime_error("Q8 model's qerror is less than Q16 model's qerror.");
 152   }
 153
 154   _qerror = int16_qerror + _qerror_ratio * std::fabs(uint8_qerror - int16_qerror);
 155   SolverOutput::get() << "Target qerror: " << _qerror << "\n";
 156
 157   if (uint8_qerror <= _qerror)
 158   {
 159     // no need for bisectioning just return Q8 model
 160     if (!_quantizer->quantize(module.get(), "uint8", layer_params))
 161     {
 162       std::cerr << "ERROR: Failed to quantize model" << std::endl;
 163       return nullptr;
 164     }
 165   }
 166
 167   int last_depth = -1;
 168   float best_depth = -1;
 169   float best_accuracy = -1;
 170   core::LayerParams best_params;
 171   if (module->size() != 1)
 172   {
 173     throw std::runtime_error("Unsupported module");
 174   }
 175   auto graph = module->graph(0);
 176   auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
 177   // input and output nodes are not valid for quantization, so let's remove them
 178   for (auto node : loco::input_nodes(graph))
 179   {
 180     active_nodes.erase(node);
 181   }
 182   for (auto node : loco::output_nodes(graph))
 183   {
 184     active_nodes.erase(node);
 185   }
 186
 187   // let's decide whether nodes at input are more suspectible to be quantized into Q16, than at
 188   // output
 189   bool int16_front = true;
 190   switch (_algorithm)
 191   {
 192     case Algorithm::Auto:
 193       int16_front =
 194         front_has_higher_error(nodes_depth, _visq_data_path, 0.5f * (max_depth + min_depth));
 195       break;
 196     case Algorithm::ForceQ16Front:
 197       SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
 198       int16_front = true;
 199       break;
 200     case Algorithm::ForceQ16Back:
 201       SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
 202       int16_front = false;
 203       break;
 204   }
 205
 206   SolverOutput::get() << "\n";
 207
 208   while (true)
 209   {
 210     if (_hooks)
 211     {
 212       _hooks->on_begin_iteration();
 213     }
 214
 215     int cut_depth = static_cast<int>(std::floor(0.5f * (min_depth + max_depth)));
 216
 217     if (last_depth == cut_depth)
 218     {
 219       break;
 220     }
 221
 222     SolverOutput::get() << "Looking for the optimal configuration in [" << min_depth << " , "
 223                         << max_depth << "] depth segment\n";
 224
 225     last_depth = cut_depth;
 226
 227     core::LayerParams layer_params;
 228     for (auto &node : active_nodes)
 229     {
 230       auto cur_node = loco::must_cast<luci::CircleNode *>(node);
 231       auto iter = nodes_depth.find(cur_node);
 232       if (iter == nodes_depth.end())
 233       {
 234         continue; // to filter out nodes like weights
 235       }
 236
 237       float depth = iter->second;
 238
 239       if ((depth <= cut_depth && int16_front) || (depth >= cut_depth && !int16_front))
 240       {
 241         auto layer_param = std::make_shared<core::LayerParam>();
 242         {
 243           layer_param->name = cur_node->name();
 244           layer_param->dtype = "int16";
 245           layer_param->granularity = "channel";
 246         }
 247
 248         layer_params.emplace_back(layer_param);
 249       }
 250     }
 251
 252     float cur_accuracy = evaluate(evaluator, module_path, "uint8", layer_params);
 253
 254     if (_hooks)
 255     {
 256       _hooks->on_end_iteration(layer_params, "uint8", cur_accuracy);
 257     }
 258
 259     if (cur_accuracy < _qerror)
 260     {
 261       SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_accuracy
 262                           << " < target qerror (" << _qerror << ")\n";
 263       int16_front ? (max_depth = cut_depth) : (min_depth = cut_depth);
 264       best_params = layer_params;
 265       best_depth = cut_depth;
 266       best_accuracy = cur_accuracy;
 267     }
 268     else
 269     {
 270       SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_accuracy
 271                           << (cur_accuracy > _qerror ? " > " : " == ") << "target qerror ("
 272                           << _qerror << ")\n";
 273       int16_front ? (min_depth = cut_depth) : (max_depth = cut_depth);
 274     }
 275   }
 276
 277   if (_hooks)
 278   {
 279     _hooks->on_end_solver(best_params, "uint8", best_accuracy);
 280   }
 281
 282   SolverOutput::get() << "Found the best configuration at depth " << best_depth << "\n";
 283   if (!_quantizer->quantize(module.get(), "uint8", best_params))
 284   {
 285     std::cerr << "ERROR: Failed to quantize model" << std::endl;
 286     return nullptr;
 287   }
 288
 289   return module;
 290 }