#include "BisectionSolver.h"
#include "DepthParameterizer.h"
-#include "ErrorMetric.h"
-#include "ErrorApproximator.h"
+#include "VISQErrorApproximator.h"
+
+#include <core/ErrorMetric.h>
+#include <core/SolverOutput.h>
#include <luci/ImporterEx.h>
-#include <luci/Log.h>
#include <cmath>
#include <iostream>
namespace
{
-bool error_at_input_is_larger_than_at_output(const NodeDepthType &nodes_depth, float cut_depth)
+/**
+ * @brief Compare errors of two disjoint subsets of a model sliced by cut_depth
+ * @return True if the front part (< cut_depth) has larger errors than the rear part (>= cut_depth)
+ */
+bool front_has_higher_error(const NodeDepthType &nodes_depth, const std::string &visq_path,
+ float cut_depth)
{
- LOGGER(l);
+ SolverOutput::get() << "\n>> Running bisection(auto) algorithm\n";
+
+ VISQErrorApproximator approximator;
+ approximator.init(visq_path);
float error_at_input = 0;
float error_at_output = 0;
for (auto &iter : nodes_depth)
{
- float cur_error = approximate(iter.first);
+ float cur_error = approximator.approximate(iter.first->name());
if (iter.second < cut_depth)
{
error_at_input += cur_error;
}
}
+ SolverOutput::get() << "Qerror of front half: " << error_at_input << "\n";
+ SolverOutput::get() << "Qerror of rear half: " << error_at_output << "\n";
if (error_at_input > error_at_output)
{
- VERBOSE(l, 0) << "Q16 will be set at input due to ";
+ SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
}
else
{
- VERBOSE(l, 0) << "Q8 will be set at input due to ";
+ SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
}
- VERBOSE(l, 0) << error_at_input << " error at input vs ";
- VERBOSE(l, 0) << error_at_output << " error at output." << std::endl;
return error_at_input > error_at_output;
}
const std::string &output_quantization)
: MPQSolver(input_data_path, qerror_ratio, input_quantization, output_quantization)
{
- _quantizer = std::make_unique<Quantizer>(_input_quantization, _output_quantization);
+ _quantizer = std::make_unique<core::Quantizer>(_input_quantization, _output_quantization);
}
-float BisectionSolver::evaluate(const DatasetEvaluator &evaluator, const std::string &flt_path,
- const std::string &def_quant, LayerParams &layers)
+float BisectionSolver::evaluate(const core::DatasetEvaluator &evaluator,
+ const std::string &flt_path, const std::string &def_quant,
+ core::LayerParams &layers)
{
auto model = read_module(flt_path);
// get fake quantized model for evaluation
void BisectionSolver::algorithm(Algorithm algorithm) { _algorithm = algorithm; }
+void BisectionSolver::setVisqPath(const std::string &visq_path) { _visq_data_path = visq_path; }
+
std::unique_ptr<luci::Module> BisectionSolver::run(const std::string &module_path)
{
- LOGGER(l);
-
auto module = read_module(module_path);
float min_depth = 0.f;
return nullptr;
}
- std::unique_ptr<MAEMetric> metric = std::make_unique<MAEMetric>();
- DatasetEvaluator evaluator(module.get(), _input_data_path, *metric.get());
+ SolverOutput::get() << "\n>> Computing baseline qerrors\n";
+
+ std::unique_ptr<core::MAEMetric> metric = std::make_unique<core::MAEMetric>();
+ core::DatasetEvaluator evaluator(module.get(), _input_data_path, *metric.get());
- LayerParams layer_params;
+ core::LayerParams layer_params;
float int16_qerror =
evaluate(evaluator, module_path, "int16" /* default quant_dtype */, layer_params);
- VERBOSE(l, 0) << "Full int16 model quantization error " << int16_qerror << std::endl;
+ SolverOutput::get() << "Full int16 model qerror: " << int16_qerror << "\n";
float uint8_qerror =
evaluate(evaluator, module_path, "uint8" /* default quant_dtype */, layer_params);
- VERBOSE(l, 0) << "Full uint8 model quantization error " << uint8_qerror << std::endl;
+ SolverOutput::get() << "Full uint8 model qerror: " << uint8_qerror << "\n";
+ _quantizer->set_hook(_hooks.get());
+ if (_hooks)
+ {
+ _hooks->on_begin_solver(module_path, uint8_qerror, int16_qerror);
+ }
if (int16_qerror > uint8_qerror)
{
}
_qerror = int16_qerror + _qerror_ratio * std::fabs(uint8_qerror - int16_qerror);
- VERBOSE(l, 0) << "Target quantization error " << _qerror << std::endl;
+ SolverOutput::get() << "Target qerror: " << _qerror << "\n";
if (uint8_qerror <= _qerror)
{
int last_depth = -1;
float best_depth = -1;
- LayerParams best_params;
+ float best_accuracy = -1;
+ core::LayerParams best_params;
if (module->size() != 1)
{
throw std::runtime_error("Unsupported module");
{
case Algorithm::Auto:
int16_front =
- error_at_input_is_larger_than_at_output(nodes_depth, 0.5f * (max_depth + min_depth));
+ front_has_higher_error(nodes_depth, _visq_data_path, 0.5f * (max_depth + min_depth));
break;
case Algorithm::ForceQ16Front:
+ SolverOutput::get() << "Front part will be Q16, while the rear will be Q8\n";
int16_front = true;
break;
case Algorithm::ForceQ16Back:
- int16_front = true;
+ SolverOutput::get() << "Front part will be Q8, while the rear will be Q16\n";
+ int16_front = false;
break;
}
+ SolverOutput::get() << "\n";
+
while (true)
{
+ if (_hooks)
+ {
+ _hooks->on_begin_iteration();
+ }
+
int cut_depth = static_cast<int>(std::floor(0.5f * (min_depth + max_depth)));
if (last_depth == cut_depth)
{
break;
}
+
+ SolverOutput::get() << "Looking for the optimal configuration in [" << min_depth << " , "
+ << max_depth << "] depth segment\n";
+
last_depth = cut_depth;
- LayerParams layer_params;
+ core::LayerParams layer_params;
for (auto &node : active_nodes)
{
auto cur_node = loco::must_cast<luci::CircleNode *>(node);
if ((depth <= cut_depth && int16_front) || (depth >= cut_depth && !int16_front))
{
- auto layer_param = std::make_shared<LayerParam>();
+ auto layer_param = std::make_shared<core::LayerParam>();
{
layer_param->name = cur_node->name();
layer_param->dtype = "int16";
}
float cur_accuracy = evaluate(evaluator, module_path, "uint8", layer_params);
- VERBOSE(l, 0) << cut_depth << " : " << cur_accuracy << std::endl;
+
+ if (_hooks)
+ {
+ _hooks->on_end_iteration(layer_params, "uint8", cur_accuracy);
+ }
if (cur_accuracy < _qerror)
{
+ SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_accuracy
+ << " < target qerror (" << _qerror << ")\n";
int16_front ? (max_depth = cut_depth) : (min_depth = cut_depth);
best_params = layer_params;
best_depth = cut_depth;
+ best_accuracy = cur_accuracy;
}
else
{
+ SolverOutput::get() << "Qerror at depth " << cut_depth << " is " << cur_accuracy
+ << (cur_accuracy > _qerror ? " > " : " == ") << "target qerror ("
+ << _qerror << ")\n";
int16_front ? (min_depth = cut_depth) : (max_depth = cut_depth);
}
}
- VERBOSE(l, 0) << "Found the best configuration at " << best_depth << " depth." << std::endl;
+ if (_hooks)
+ {
+ _hooks->on_end_solver(best_params, "uint8", best_accuracy);
+ }
+
+ SolverOutput::get() << "Found the best configuration at depth " << best_depth << "\n";
if (!_quantizer->quantize(module.get(), "uint8", best_params))
{
std::cerr << "ERROR: Failed to quantize model" << std::endl;