2 * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "BisectionSolver.h"
18 #include "DepthParameterizer.h"
19 #include "VISQErrorApproximator.h"
21 #include <core/ErrorMetric.h>
22 #include <core/SolverOutput.h>
24 #include <luci/ImporterEx.h>
29 using namespace mpqsolver::bisection;
35 * @brief Compare errors of two disjoint subsets of a model sliced by cut_depth
36 * @return True if the front part (< cut_depth) has larger errors than the rear part (>= cut_depth)
38 bool front_has_higher_error(const NodeDepthType &nodes_depth, const std::string &visq_path,
41 SolverOutput::get() << "\n>> Running bisection(auto) algorithm\n";
43 VISQErrorApproximator approximator;
44 approximator.init(visq_path);
46 float error_at_input = 0;
47 float error_at_output = 0;
48 for (auto &iter : nodes_depth)
50 float cur_error = approximator.approximate(iter.first->name());
51 if (iter.second < cut_depth)
53 error_at_input += cur_error;
57 error_at_output += cur_error;
61 SolverOutput::get() << "Qerror of front half: " << error_at_input << "\n";
62 SolverOutput::get() << "Qerror of rear half: " << error_at_output << "\n";
63 if (error_at_input > error_at_output)
65 SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
69 SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
72 return error_at_input > error_at_output;
75 std::unique_ptr<luci::Module> read_module(const std::string &path)
77 luci::ImporterEx importerex;
78 auto module = importerex.importVerifyModule(path);
79 if (module.get() == nullptr)
81 std::cerr << "ERROR: Failed to load " << path << std::endl;
90 BisectionSolver::BisectionSolver(const std::string &input_data_path, float qerror_ratio,
91 const std::string &input_quantization,
92 const std::string &output_quantization)
93 : MPQSolver(input_data_path, qerror_ratio, input_quantization, output_quantization)
95 _quantizer = std::make_unique<core::Quantizer>(_input_quantization, _output_quantization);
98 float BisectionSolver::evaluate(const core::DatasetEvaluator &evaluator,
99 const std::string &flt_path, const std::string &def_quant,
100 core::LayerParams &layers)
102 auto model = read_module(flt_path);
103 // get fake quantized model for evaluation
104 if (!_quantizer->fake_quantize(model.get(), def_quant, layers))
106 throw std::runtime_error("Failed to produce fake-quantized model.");
109 return evaluator.evaluate(model.get());
112 void BisectionSolver::algorithm(Algorithm algorithm) { _algorithm = algorithm; }
114 void BisectionSolver::setVisqPath(const std::string &visq_path) { _visq_data_path = visq_path; }
116 std::unique_ptr<luci::Module> BisectionSolver::run(const std::string &module_path)
118 auto module = read_module(module_path);
120 float min_depth = 0.f;
121 float max_depth = 0.f;
122 NodeDepthType nodes_depth;
123 if (compute_depth(module.get(), nodes_depth, min_depth, max_depth) !=
124 ParameterizerResult::SUCCESS)
126 std::cerr << "ERROR: Invalid graph for bisectioning" << std::endl;
130 SolverOutput::get() << "\n>> Computing baseline qerrors\n";
132 std::unique_ptr<core::MAEMetric> metric = std::make_unique<core::MAEMetric>();
133 core::DatasetEvaluator evaluator(module.get(), _input_data_path, *metric.get());
135 core::LayerParams layer_params;
137 evaluate(evaluator, module_path, "int16" /* default quant_dtype */, layer_params);
138 SolverOutput::get() << "Full int16 model qerror: " << int16_qerror << "\n";
141 evaluate(evaluator, module_path, "uint8" /* default quant_dtype */, layer_params);
142 SolverOutput::get() << "Full uint8 model qerror: " << uint8_qerror << "\n";
143 _quantizer->set_hook(_hooks.get());
146 _hooks->on_begin_solver(module_path, uint8_qerror, int16_qerror);
149 if (int16_qerror > uint8_qerror)
151 throw std::runtime_error("Q8 model's qerror is less than Q16 model's qerror.");
154 _qerror = int16_qerror + _qerror_ratio * std::fabs(uint8_qerror - int16_qerror);
155 SolverOutput::get() << "Target qerror: " << _qerror << "\n";
157 if (uint8_qerror <= _qerror)
159 // no need for bisectioning just return Q8 model
160 if (!_quantizer->quantize(module.get(), "uint8", layer_params))
162 std::cerr << "ERROR: Failed to quantize model" << std::endl;
168 float best_depth = -1;
169 float best_accuracy = -1;
170 core::LayerParams best_params;
171 if (module->size() != 1)
173 throw std::runtime_error("Unsupported module");
175 auto graph = module->graph(0);
176 auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
177 // input and output nodes are not valid for quantization, so let's remove them
178 for (auto node : loco::input_nodes(graph))
180 active_nodes.erase(node);
182 for (auto node : loco::output_nodes(graph))
184 active_nodes.erase(node);
187 // let's decide whether nodes at input are more suspectible to be quantized into Q16, than at
189 bool int16_front = true;
192 case Algorithm::Auto:
194 front_has_higher_error(nodes_depth, _visq_data_path, 0.5f * (max_depth + min_depth));
196 case Algorithm::ForceQ16Front:
197 SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
200 case Algorithm::ForceQ16Back:
201 SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
206 SolverOutput::get() << "\n";
212 _hooks->on_begin_iteration();
215 int cut_depth = static_cast<int>(std::floor(0.5f * (min_depth + max_depth)));
217 if (last_depth == cut_depth)
222 SolverOutput::get() << "Looking for the optimal configuration in [" << min_depth << " , "
223 << max_depth << "] depth segment\n";
225 last_depth = cut_depth;
227 core::LayerParams layer_params;
228 for (auto &node : active_nodes)
230 auto cur_node = loco::must_cast<luci::CircleNode *>(node);
231 auto iter = nodes_depth.find(cur_node);
232 if (iter == nodes_depth.end())
234 continue; // to filter out nodes like weights
237 float depth = iter->second;
239 if ((depth <= cut_depth && int16_front) || (depth >= cut_depth && !int16_front))
241 auto layer_param = std::make_shared<core::LayerParam>();
243 layer_param->name = cur_node->name();
244 layer_param->dtype = "int16";
245 layer_param->granularity = "channel";
248 layer_params.emplace_back(layer_param);
252 float cur_accuracy = evaluate(evaluator, module_path, "uint8", layer_params);
256 _hooks->on_end_iteration(layer_params, "uint8", cur_accuracy);
259 if (cur_accuracy < _qerror)
261 SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_accuracy
262 << " < target qerror (" << _qerror << ")\n";
263 int16_front ? (max_depth = cut_depth) : (min_depth = cut_depth);
264 best_params = layer_params;
265 best_depth = cut_depth;
266 best_accuracy = cur_accuracy;
270 SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_accuracy
271 << (cur_accuracy > _qerror ? " > " : " == ") << "target qerror ("
273 int16_front ? (min_depth = cut_depth) : (max_depth = cut_depth);
279 _hooks->on_end_solver(best_params, "uint8", best_accuracy);
282 SolverOutput::get() << "Found the best configuration at depth " << best_depth << "\n";
283 if (!_quantizer->quantize(module.get(), "uint8", best_params))
285 std::cerr << "ERROR: Failed to quantize model" << std::endl;