2 ''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
3 ''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # '''
4 ''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # '''
5 ''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
8 # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
10 # Licensed under the Apache License, Version 2.0 (the "License");
11 # you may not use this file except in compliance with the License.
12 # You may obtain a copy of the License at
14 # http://www.apache.org/licenses/LICENSE-2.0
16 # Unless required by applicable law or agreed to in writing, software
17 # distributed under the License is distributed on an "AS IS" BASIS,
18 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 # See the License for the specific language governing permissions and
20 # limitations under the License.
28 import onelib.utils as oneutils
29 from onelib.Command import Command
31 # TODO Find better way to suppress trackback on error
32 sys.tracebacklimit = 0
36 parser = argparse.ArgumentParser(
37 description='command line tool to quantize circle model')
39 oneutils.add_default_arg(parser)
41 # input and output path.
43 '-i', '--input_path', type=str, help='full filepath of the input circle model')
49 'full filepath of the input data used for post-training quantization. if not specified, run with random input data.'
53 '--input_data_format',
56 'file format of input data. h5/hdf5 (default), list/filelist (a text file where a file path of input data is written in each line), or dir/directory (a directory where input data are saved)'
62 help='full filepath of the output quantized model')
64 # argument for profiling
67 '--generate_profile_data',
69 help='generate profiling data')
71 # save intermediate file(s)
73 '--save_intermediate',
75 help='Save intermediate files to output folder')
77 ## arguments for quantization
78 quantization_group = parser.add_argument_group('arguments for quantization')
80 quantization_group.add_argument(
84 'input model data type (supported: float32, default=float32). Deprecated (Use input_model_dtype)'
86 quantization_group.add_argument(
87 '--input_model_dtype',
89 help='input model data type (supported: float32, default=float32)')
90 quantization_group.add_argument(
93 help='data type of output quantized model (supported: uint8, int16, default=uint8)'
95 quantization_group.add_argument(
98 help='quantization granularity (supported: layer, channel, default=layer)')
99 quantization_group.add_argument(
103 'data type of inputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
105 quantization_group.add_argument(
109 'data type of outputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
111 quantization_group.add_argument(
115 'minimum percentile (0.0~100.0, default=1.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.'
117 quantization_group.add_argument(
121 'maximum percentile (0.0~100.0, default=99.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.'
123 quantization_group.add_argument(
124 '--moving_avg_batch',
127 'batch size of moving average (default=16). This is valid when calibration algorithm is moving_average.'
129 quantization_group.add_argument(
130 '--moving_avg_const',
133 'hyperparameter (C) to compute moving average (default=0.1). Update equation: avg <- avg + C * (curr_batch_avg - avg). This is valid when calibration algorithm is moving_average.'
135 quantization_group.add_argument(
139 "calibration algorithm for post-training quantization (supported: percentile/moving_average, default=percentile). 'percentile' mode uses the n-th percentiles as min/max values. 'moving_average' mode records the moving average of min/max."
141 quantization_group.add_argument(
142 '--TF-style_maxpool',
145 "Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)"
147 quantization_group.add_argument(
148 '--quant_config', type=str, help="Path to the quantization configuration file.")
149 quantization_group.add_argument(
153 "Evaluate accuracy of quantized model. Run inference for both fp32 model and the quantized model, and compare the inference results."
155 quantization_group.add_argument(
156 '--test_data', type=str, help="Path to the test data used for evaluation.")
157 quantization_group.add_argument(
161 "Print MAE (Mean Absolute Error) of inference results between quantized model and fp32 model."
163 quantization_group.add_argument(
167 "Print MAPE (Mean Absolute Percentage Error) of inference results between quantized model and fp32 model."
169 quantization_group.add_argument(
173 "Print MPEIR (Mean Peak Error to Interval Ratio) of inference results between quantized model and fp32 model."
175 quantization_group.add_argument(
176 '--print_top1_match',
179 "Print Top-1 match ratio of inference results between quantized model and fp32 model."
181 quantization_group.add_argument(
182 '--print_top5_match',
185 "Print Top-5 match ratio of inference results between quantized model and fp32 model."
187 quantization_group.add_argument(
191 "Print MSE (Mean Squared Error) of inference results between quantized model and fp32 model."
194 # arguments for force_quantparam option
195 force_quantparam_group = parser.add_argument_group(
196 'arguments for force_quantparam option')
198 force_quantparam_group.add_argument(
199 '--force_quantparam',
202 'overwrite quantparam (scale, zero_point) to the specified tensor in the quantized model.'
204 force_quantparam_group.add_argument(
205 '--tensor_name', type=str, action='append', help='tensor name (string)')
206 force_quantparam_group.add_argument(
207 '--scale', type=float, action='append', help='scale (float)')
208 force_quantparam_group.add_argument(
209 '--zero_point', type=int, action='append', help='zero point (int)')
211 # arguments for copy_quantparam option
212 copy_quantparam_group = parser.add_argument_group(
213 'arguments for copy_quantparam option')
215 copy_quantparam_group.add_argument(
218 help='copy quantparam (scale, zero_point) of a tensor to another tensor.')
219 copy_quantparam_group.add_argument(
220 '--src_tensor_name', type=str, action='append', help='tensor name (string)')
221 copy_quantparam_group.add_argument(
222 '--dst_tensor_name', type=str, action='append', help='tensor name (string)')
224 # arguments for fake_quant option
225 fake_quant_group = parser.add_argument_group('arguments for fake_quantize option')
227 fake_quant_group.add_argument(
230 help='convert quantized model to fake-quantized fp32 model.')
232 # arguments for requantize option
233 requantize_group = parser.add_argument_group('arguments for requantize option')
235 requantize_group.add_argument(
238 help='convert quantized model to another-typed quantized model (ex: int8 -> uin8).'
241 # arguments for ampq option
242 ampq_quant_group = parser.add_argument_group('arguments for ampq option')
244 ampq_quant_group.add_argument(
245 '--ampq', action='store_true', help='quantize model using ampq solver.')
248 ampq_quant_group.add_argument(
249 '--ampq_qerror_ratio', type=str, help='quantization error ratio ([0, 1])')
252 ampq_quant_group.add_argument(
253 '--ampq_algorithm', type=str, help='type of algorithm (bisection)')
255 ampq_quant_group.add_argument(
256 '--bisection_type', type=str, help="one of 'auto', 'i16_front', 'i16_back'")
258 # ampq_bisection_visq
259 ampq_quant_group.add_argument(
260 '--ampq_bisection_visq',
262 help='.visq.json file path with quantization errors')
267 def _set_default_values(args):
268 if not oneutils.is_valid_attr(args,
269 'input_model_dtype') and not oneutils.is_valid_attr(
270 args, 'input_dtype'):
271 setattr(args, 'input_model_dtype', 'float32')
272 if not oneutils.is_valid_attr(args, 'quantized_dtype'):
273 setattr(args, 'quantized_dtype', 'uint8')
274 if oneutils.is_valid_attr(args, 'quant_config'):
275 # Get quantized_dtype from qconfig file
277 with open(getattr(args, 'quant_config')) as f:
279 if 'default_quantization_dtype' in qconf:
280 setattr(args, 'quantized_dtype',
281 qconf['default_quantization_dtype'])
282 except json.decoder.JSONDecodeError:
283 print('Failed to decode ' + getattr(args, 'quant_config') +
284 '. Please check it is a json file.')
285 if not oneutils.is_valid_attr(args, 'granularity'):
286 setattr(args, 'granularity', 'layer')
287 if oneutils.is_valid_attr(args, 'quant_config'):
288 # Get granularity from qconfig file
290 with open(getattr(args, 'quant_config')) as f:
292 if 'default_granularity' in qconf:
293 setattr(args, 'granularity', qconf['default_granularity'])
294 except json.decoder.JSONDecodeError:
295 print('Failed to decode ' + getattr(args, 'quant_config') +
296 '. Please check it is a json file.')
297 if not oneutils.is_valid_attr(args, 'mode'):
298 setattr(args, 'mode', 'percentile')
299 if not oneutils.is_valid_attr(args, 'min_percentile'):
300 setattr(args, 'min_percentile', '1.0')
301 if not oneutils.is_valid_attr(args, 'max_percentile'):
302 setattr(args, 'max_percentile', '99.0')
303 if not oneutils.is_valid_attr(args, 'moving_avg_batch'):
304 setattr(args, 'moving_avg_batch', '16')
305 if not oneutils.is_valid_attr(args, 'moving_avg_const'):
306 setattr(args, 'moving_avg_const', '0.1')
307 if not oneutils.is_valid_attr(args, 'ampq_algorithm'):
308 setattr(args, 'ampq_algorithm', 'bisection')
309 if not oneutils.is_valid_attr(args, 'bisection_type'):
310 setattr(args, 'bisection_type', 'auto')
313 def _verify_arg_pre(parser, args):
314 """verify given arguments before default values are set"""
315 # check if required arguments is given
317 if oneutils.is_valid_attr(args, 'requantize'):
318 if not oneutils.is_valid_attr(args,
319 'input_model_dtype') and not oneutils.is_valid_attr(
320 args, 'input_dtype'):
321 missing.append('--input_model_dtype')
322 if not oneutils.is_valid_attr(args, 'quantized_dtype'):
323 missing.append('--quantized_dtype')
325 parser.error('the following arguments are required: ' + ' '.join(missing))
328 def _verify_arg(parser, args):
329 """verify given arguments"""
330 # check if required arguments is given
332 if not oneutils.is_valid_attr(args, 'input_path'):
333 missing.append('-i/--input_path')
334 if not oneutils.is_valid_attr(args, 'output_path'):
335 missing.append('-o/--output_path')
336 if oneutils.is_valid_attr(args, 'force_quantparam'):
337 if not oneutils.is_valid_attr(args, 'tensor_name'):
338 missing.append('--tensor_name')
339 if not oneutils.is_valid_attr(args, 'scale'):
340 missing.append('--scale')
341 if not oneutils.is_valid_attr(args, 'zero_point'):
342 missing.append('--zero_point')
343 if oneutils.is_valid_attr(args, 'copy_quantparam'):
344 if not oneutils.is_valid_attr(args, 'src_tensor_name'):
345 missing.append('--src_tensor_name')
346 if not oneutils.is_valid_attr(args, 'dst_tensor_name'):
347 missing.append('--dst_tensor_name')
349 parser.error('the following arguments are required: ' + ' '.join(missing))
350 if oneutils.is_valid_attr(args, 'force_quantparam'):
351 tensors = getattr(args, 'tensor_name')
352 scales = getattr(args, 'scale')
353 zerops = getattr(args, 'zero_point')
354 if len(tensors) != len(scales) or len(tensors) != len(zerops):
356 'The same number of tensor_name, scale, and zero_point should be given.')
357 if oneutils.is_valid_attr(args, 'copy_quantparam'):
358 src_tensors = getattr(args, 'src_tensor_name')
359 dst_tensors = getattr(args, 'dst_tensor_name')
360 if len(src_tensors) != len(dst_tensors):
362 'The same number of src_tensor_name and dst_tensor_name should be given.')
364 # Check calibration parameters
365 if oneutils.is_valid_attr(args, 'mode'):
366 if getattr(args, 'mode') == 'percentile':
369 min_percentile = float(getattr(args, 'min_percentile'))
371 parser.error('min_percentile must be float')
373 max_percentile = float(getattr(args, 'max_percentile'))
375 parser.error('max_percentile must be float')
376 elif getattr(args, 'mode') == 'moving_average':
379 moving_avg_batch = int(getattr(args, 'moving_avg_batch'))
381 parser.error('moving_avg_batch must be integer')
383 moving_avg_const = float(getattr(args, 'moving_avg_const'))
385 parser.error('moving_avg_const must be float')
387 parser.error('Unsupported mode')
390 def _parse_arg(parser):
391 args = parser.parse_args()
394 oneutils.print_version_and_exit(__file__)
400 if oneutils.is_valid_attr(args, 'ampq'):
404 if oneutils.is_valid_attr(args, 'force_quantparam'):
405 # write quantization parameters
409 if oneutils.is_valid_attr(args, 'copy_quantparam'):
410 # copy quantization parameters
414 if oneutils.is_valid_attr(args, 'fake_quantize'):
415 # fake-quantize model
419 if oneutils.is_valid_attr(args, 'requantize'):
424 # get file path to log
425 dir_path = os.path.dirname(os.path.realpath(__file__))
426 logfile_path = os.path.realpath(args.output_path) + '.log'
428 with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
429 if oneutils.is_valid_attr(args, 'save_intermediate'):
430 tmpdir = os.path.dirname(logfile_path)
432 circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
433 record_minmax_path = os.path.join(dir_path, 'record-minmax')
435 ## make a command to quantize and dequantize the weights of the model
436 circle_quantizer_cmd = [circle_quantizer_path]
438 if oneutils.is_valid_attr(args, 'verbose'):
439 circle_quantizer_cmd.append('--verbose')
440 # quantize_dequantize_weights
441 circle_quantizer_cmd.append('--quantize_dequantize_weights')
442 # Use input_model_dtype if it exists. Use input_dtype otherwise.
443 if oneutils.is_valid_attr(args, 'input_model_dtype'):
444 circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
445 elif oneutils.is_valid_attr(args, 'input_dtype'):
446 circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
447 if oneutils.is_valid_attr(args, 'quantized_dtype'):
448 circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
449 if oneutils.is_valid_attr(args, 'granularity'):
450 circle_quantizer_cmd.append(getattr(args, 'granularity'))
451 if oneutils.is_valid_attr(args, 'quant_config'):
452 # NOTE --config conflicts with --config option in onecc, so
453 # we use quant_config for one-quantize
454 circle_quantizer_cmd.append('--config')
455 circle_quantizer_cmd.append(getattr(args, 'quant_config'))
456 # input and output path
457 if oneutils.is_valid_attr(args, 'input_path'):
458 circle_quantizer_cmd.append(getattr(args, 'input_path'))
459 tmp_weights_fake_quant_path = os.path.join(
461 os.path.splitext(os.path.basename(
462 args.input_path))[0]) + '.weights_fake_quant.circle'
463 circle_quantizer_cmd.append(tmp_weights_fake_quant_path)
465 if oneutils.is_valid_attr(args, 'generate_profile_data'):
466 circle_quantizer_cmd.append('--generate_profile_data')
468 f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
470 # run circle-quantizer
471 oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
473 tmp_minmax_recorded_path = os.path.join(
475 os.path.splitext(os.path.basename(
476 args.input_path))[0]) + '.minmax_recorded.circle'
478 ## make a command to record min-max value of each tensor while running the representative dataset
479 record_minmax_cmd = Command(record_minmax_path, args, f)
480 record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
481 .add_option_with_values('--input_model', [tmp_weights_fake_quant_path]) \
482 .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \
483 .add_option_with_valid_args('--input_data', ['input_data']) \
484 .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
485 .add_option_with_valid_args('--min_percentile', ['min_percentile']) \
486 .add_option_with_valid_args('--max_percentile', ['max_percentile']) \
487 .add_option_with_valid_args('--moving_avg_batch', ['moving_avg_batch']) \
488 .add_option_with_valid_args('--moving_avg_const', ['moving_avg_const']) \
489 .add_option_with_valid_args('--mode', ['mode']) \
490 .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \
493 ## make a second command to quantize the model using the embedded information
494 circle_quantizer_cmd = [circle_quantizer_path]
496 if oneutils.is_valid_attr(args, 'verbose'):
497 circle_quantizer_cmd.append('--verbose')
498 # quantize_dequantize_weights
499 circle_quantizer_cmd.append('--quantize_with_minmax')
500 # Use input_model_dtype if it exists. Use input_dtype otherwise.
501 if oneutils.is_valid_attr(args, 'input_model_dtype'):
502 circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
503 elif oneutils.is_valid_attr(args, 'input_dtype'):
504 circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
505 if oneutils.is_valid_attr(args, 'quantized_dtype'):
506 circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
507 if oneutils.is_valid_attr(args, 'granularity'):
508 circle_quantizer_cmd.append(getattr(args, 'granularity'))
509 if oneutils.is_valid_attr(args, 'TF-style_maxpool'):
510 circle_quantizer_cmd.append('--TF-style_maxpool')
511 if oneutils.is_valid_attr(args, 'input_type'):
512 circle_quantizer_cmd.append('--input_type')
513 circle_quantizer_cmd.append(getattr(args, 'input_type'))
514 if oneutils.is_valid_attr(args, 'output_type'):
515 circle_quantizer_cmd.append('--output_type')
516 circle_quantizer_cmd.append(getattr(args, 'output_type'))
517 if oneutils.is_valid_attr(args, 'quant_config'):
518 # NOTE --config conflicts with --config option in onecc, so
519 # we use quant_config for one-quantize
520 circle_quantizer_cmd.append('--config')
521 circle_quantizer_cmd.append(getattr(args, 'quant_config'))
522 # input and output path
523 circle_quantizer_cmd.append(tmp_minmax_recorded_path)
524 if oneutils.is_valid_attr(args, 'output_path'):
525 circle_quantizer_cmd.append(getattr(args, 'output_path'))
527 if oneutils.is_valid_attr(args, 'generate_profile_data'):
528 circle_quantizer_cmd.append('--generate_profile_data')
530 f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
532 # run circle-quantizer
533 oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
536 if oneutils.is_valid_attr(args, 'evaluate_result'):
537 circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff')
539 if oneutils.is_valid_attr(args, 'output_path'):
540 quant_model = getattr(args, 'output_path')
541 tmp_fake_quant_model = os.path.join(
543 os.path.splitext(os.path.basename(
544 args.input_path))[0]) + '.fake_quant.circle'
546 # do fake quantization
547 fake_quantize_cmd = Command(circle_quantizer_path, args, f)
548 fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
549 .add_option_with_values('--fake_quantize', [quant_model, tmp_fake_quant_model]) \
552 # compare fake-quant model and fp32 model
553 circle_eval_diff_cmd = Command(circle_eval_diff_path, args, f)
554 circle_eval_diff_cmd.add_option_with_valid_args('--first_model', ['input_path']) \
555 .add_option_with_values('--second_model', [tmp_fake_quant_model]) \
556 .add_option_with_valid_args('--first_input_data', ['test_data']) \
557 .add_option_with_valid_args('--second_input_data', ['test_data']) \
558 .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
559 .add_noarg_option_if_valid_arg('--print_mae', 'print_mae') \
560 .add_noarg_option_if_valid_arg('--print_mape', 'print_mape') \
561 .add_noarg_option_if_valid_arg('--print_mpeir', 'print_mpeir') \
562 .add_noarg_option_if_valid_arg('--print_top1_match', 'print_top1_match') \
563 .add_noarg_option_if_valid_arg('--print_top5_match', 'print_top5_match') \
564 .add_noarg_option_if_valid_arg('--print_mse', 'print_mse') \
568 def _write_qparam(args):
569 # get file path to log
570 dir_path = os.path.dirname(os.path.realpath(__file__))
571 logfile_path = os.path.realpath(args.output_path) + '.log'
573 with open(logfile_path, 'wb') as f:
575 circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
577 # make a command to write qparams to the tensors
578 circle_quantizer_cmd = [circle_quantizer_path]
580 if oneutils.is_valid_attr(args, 'verbose'):
581 circle_quantizer_cmd.append('--verbose')
582 if oneutils.is_valid_attr(args, 'tensor_name'):
583 tensor_name = getattr(args, 'tensor_name')
584 if oneutils.is_valid_attr(args, 'scale'):
585 scale = getattr(args, 'scale')
586 if oneutils.is_valid_attr(args, 'zero_point'):
587 zero_point = getattr(args, 'zero_point')
588 for (t, s, zp) in zip(tensor_name, scale, zero_point):
589 circle_quantizer_cmd.append('--force_quantparam')
590 circle_quantizer_cmd.append(t)
591 circle_quantizer_cmd.append(str(s))
592 circle_quantizer_cmd.append(str(zp))
593 # input and output path
594 if oneutils.is_valid_attr(args, 'input_path'):
595 circle_quantizer_cmd.append(getattr(args, 'input_path'))
596 if oneutils.is_valid_attr(args, 'output_path'):
597 circle_quantizer_cmd.append(getattr(args, 'output_path'))
599 f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
601 # run circle-quantizer
602 oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
605 def _copy_qparam(args):
606 # get file path to log
607 dir_path = os.path.dirname(os.path.realpath(__file__))
608 logfile_path = os.path.realpath(args.output_path) + '.log'
610 with open(logfile_path, 'wb') as f:
612 circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
614 # make a command to write qparams to the tensors
615 circle_quantizer_cmd = [circle_quantizer_path]
617 if oneutils.is_valid_attr(args, 'verbose'):
618 circle_quantizer_cmd.append('--verbose')
619 if oneutils.is_valid_attr(args, 'src_tensor_name'):
620 src_tensor_name = getattr(args, 'src_tensor_name')
621 if oneutils.is_valid_attr(args, 'dst_tensor_name'):
622 dst_tensor_name = getattr(args, 'dst_tensor_name')
623 for (src, dst) in zip(src_tensor_name, dst_tensor_name):
624 circle_quantizer_cmd.append('--copy_quantparam')
625 circle_quantizer_cmd.append(src)
626 circle_quantizer_cmd.append(dst)
627 # input and output path
628 if oneutils.is_valid_attr(args, 'input_path'):
629 circle_quantizer_cmd.append(getattr(args, 'input_path'))
630 if oneutils.is_valid_attr(args, 'output_path'):
631 circle_quantizer_cmd.append(getattr(args, 'output_path'))
633 f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
635 # run circle-quantizer
636 oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
639 def _fake_quantize(args):
640 # get file path to log
641 dir_path = os.path.dirname(os.path.realpath(__file__))
642 logfile_path = os.path.realpath(args.output_path) + '.log'
644 with open(logfile_path, 'wb') as f:
646 circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
647 q_model = getattr(args, 'input_path')
648 fq_model = getattr(args, 'output_path')
650 # do fake quantization
651 fake_quantize_cmd = Command(circle_quantizer_path, args, f)
652 fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
653 .add_option_with_values('--fake_quantize', [q_model, fq_model]) \
657 def _ampq_solve(args):
658 # get file path to log
659 dir_path = os.path.dirname(os.path.realpath(__file__))
660 logfile_path = os.path.realpath(args.output_path) + '.log'
662 with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
663 if oneutils.is_valid_attr(args, 'save_intermediate'):
664 tmpdir = os.path.dirname(logfile_path)
667 record_minmax_path = os.path.join(dir_path, 'record-minmax')
669 tmp_minmax_recorded_path = os.path.join(
671 os.path.splitext(os.path.basename(
672 args.input_path))[0]) + '.minmax_recorded.circle'
674 ## make a command to record min-max value of each tensor while running the representative dataset
675 record_minmax_cmd = Command(record_minmax_path, args, f)
676 record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
677 .add_option_with_valid_args('--input_model', ['input_path']) \
678 .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \
679 .add_option_with_valid_args('--input_data', ['input_data']) \
680 .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
681 .add_option_with_valid_args('--min_percentile', ['min_percentile']) \
682 .add_option_with_valid_args('--max_percentile', ['max_percentile']) \
683 .add_option_with_valid_args('--moving_avg_batch', ['moving_avg_batch']) \
684 .add_option_with_valid_args('--moving_avg_const', ['moving_avg_const']) \
685 .add_option_with_valid_args('--mode', ['mode']) \
686 .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \
689 # process visq if needed
691 if oneutils.is_valid_attr(args, 'ampq_bisection_visq'):
692 visq_file = getattr(args, 'ampq_bisection_visq')
694 if (oneutils.is_valid_attr(args, 'ampq_algorithm')
695 and oneutils.is_valid_attr(args, 'bisection_type')):
696 algorithm = getattr(args, 'ampq_algorithm')
697 bisection_type = getattr(args, 'bisection_type')
698 if algorithm == 'bisection' and bisection_type == 'auto' and visq_file is None:
699 # algorithm needs bisection but no file in input configuration
701 # to compute visq file we need q8 quantized model
702 q8_file = os.path.join(
704 os.path.splitext(os.path.basename(
705 args.input_path))[0]) + '.visq.q8.circle'
708 circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
709 circle_quantizer_cmd = [circle_quantizer_path]
711 if oneutils.is_valid_attr(args, 'verbose'):
712 circle_quantizer_cmd.append('--verbose')
713 circle_quantizer_cmd.append('--quantize_with_minmax')
714 circle_quantizer_cmd.append('float32')
715 circle_quantizer_cmd.append('uint8')
716 circle_quantizer_cmd.append('channel')
718 if oneutils.is_valid_attr(args, 'TF-style_maxpool'):
719 circle_quantizer_cmd.append('--TF-style_maxpool')
721 circle_quantizer_cmd.extend(['--input_type', 'uint8'])
722 circle_quantizer_cmd.extend(['--output_type', 'uint8'])
724 # input and output paths
725 circle_quantizer_cmd.append(tmp_minmax_recorded_path)
726 circle_quantizer_cmd.append(q8_file)
728 f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
730 # run circle-quantizer
732 circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
735 visq_path = os.path.join(dir_path, 'visq')
737 visq_file = os.path.join(
739 os.path.splitext(os.path.basename(
740 args.input_path))[0]) + '.tae.visq.json'
742 visq_cmd = [visq_path]
743 visq_cmd.extend(['--fp32_circle', getattr(args, 'input_path')])
744 visq_cmd.extend(['--data', getattr(args, 'input_data')])
745 visq_cmd.extend(['--q_circle', q8_file])
746 visq_cmd.extend(['--tae_output', visq_file])
747 visq_cmd.extend(['--batch_size', "1"])
748 visq_cmd.append('--dump_dot_graph')
749 f.write((' '.join(visq_cmd) + '\n').encode())
752 oneutils.run(visq_cmd, err_prefix="visq", logfile=f)
755 circle_mpqsolver_path = os.path.join(dir_path, 'circle-mpqsolver')
757 # solve for Mixed Precision Quantization configuration
758 ampq_quantize_cmd = [circle_mpqsolver_path]
761 if oneutils.is_valid_attr(args, 'input_data'):
762 ampq_quantize_cmd.extend(['--data', getattr(args, 'input_data')])
765 if oneutils.is_valid_attr(args, 'input_data_format'):
766 ampq_quantize_cmd.extend(
767 ['--data_format', getattr(args, 'input_data_format')])
770 if oneutils.is_valid_attr(args, 'ampq_qerror_ratio'):
771 ampq_quantize_cmd.extend(
772 ['--qerror_ratio', getattr(args, 'ampq_qerror_ratio')])
775 if oneutils.is_valid_attr(args, 'ampq_algorithm'):
776 algorithm = getattr(args, 'ampq_algorithm')
777 if algorithm == 'bisection':
778 if oneutils.is_valid_attr(args, 'bisection_type'):
779 bisection_type = getattr(args, 'bisection_type')
780 if bisection_type == 'auto':
781 ampq_quantize_cmd.extend(['--bisection', 'auto'])
782 elif bisection_type == 'i16_front':
783 ampq_quantize_cmd.extend(['--bisection', 'true'])
784 elif bisection_type == 'i16_back':
785 ampq_quantize_cmd.extend(['--bisection', 'false'])
787 # recorded model as input
788 ampq_quantize_cmd.extend(['--input_model', tmp_minmax_recorded_path])
791 if oneutils.is_valid_attr(args, 'input_type'):
792 ampq_quantize_cmd.extend(['--input_dtype', getattr(args, 'input_type')])
795 if oneutils.is_valid_attr(args, 'output_type'):
796 ampq_quantize_cmd.extend(['--output_dtype', getattr(args, 'output_type')])
799 if oneutils.is_valid_attr(args, 'output_path'):
800 ampq_quantize_cmd.extend(['--output_model', getattr(args, 'output_path')])
803 if not (visq_file is None):
804 ampq_quantize_cmd.extend(['--visq_file', visq_file])
807 if oneutils.is_valid_attr(args, 'save_intermediate'):
808 intermediate_dir = os.path.dirname(logfile_path)
809 ampq_quantize_cmd.extend(['--save_intermediate', intermediate_dir])
811 if oneutils.is_valid_attr(args, 'verbose'):
812 ampq_quantize_cmd.append('--verbose')
814 f.write((' '.join(ampq_quantize_cmd) + '\n').encode())
817 oneutils.run(ampq_quantize_cmd, err_prefix="circle_mpqsolver", logfile=f)
820 def _requantize(args):
821 # get file path to log
822 dir_path = os.path.dirname(os.path.realpath(__file__))
823 logfile_path = os.path.realpath(args.output_path) + '.log'
825 with open(logfile_path, 'wb') as f:
827 circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
829 ## make a command to quantize and dequantize the weights of the model
830 circle_quantizer_cmd = [circle_quantizer_path]
832 if oneutils.is_valid_attr(args, 'verbose'):
833 circle_quantizer_cmd.append('--verbose')
835 circle_quantizer_cmd.append('--requantize')
836 # Use input_model_dtype if it exists. Use input_dtype otherwise.
837 if oneutils.is_valid_attr(args, 'input_model_dtype'):
838 circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
839 elif oneutils.is_valid_attr(args, 'input_dtype'):
840 circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
841 if oneutils.is_valid_attr(args, 'quantized_dtype'):
842 circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
843 # input and output path
844 if oneutils.is_valid_attr(args, 'input_path'):
845 circle_quantizer_cmd.append(getattr(args, 'input_path'))
846 if oneutils.is_valid_attr(args, 'output_path'):
847 circle_quantizer_cmd.append(getattr(args, 'output_path'))
849 f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
851 # run circle-quantizer
852 oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
857 parser = _get_parser()
858 args = _parse_arg(parser)
860 # parse configuration file
861 oneutils.parse_cfg(args.config, 'one-quantize', args)
863 # verify arguments before default value setting
864 _verify_arg_pre(parser, args)
867 _set_default_values(args)
870 _verify_arg(parser, args)
876 if __name__ == '__main__':
877 oneutils.safemain(main, __file__)