1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
13 #define MAX_OUT_MULTIPLIER 230
14 #define MAX_VAL_1B_WEIGHT 127
15 #define MAX_VAL_2B_WEIGHT 16384
16 #define MAX_VAL_2B_FEAT 16384
18 #define QUANTWARNING(...) (fprintf(stderr, __VA_ARGS__))
20 #define QUANTWARNING(...)
23 void QuantizeAffine16(float *ptr_float_weights,
24 float *ptr_float_biases,
25 int16_t *ptr_int_weights,
26 int32_t *ptr_int_biases,
27 float input_scale_factor,
28 float *ptr_weight_scale_factor,
29 float *ptr_output_scale_factor,
32 uint32_t num_rows_padded,
33 uint32_t num_columns_padded);
34 void FixedQuantizeAffine16(float *ptr_float_weights,
35 float *ptr_float_biases,
36 int16_t *ptr_int_weights,
37 int32_t *ptr_int_biases,
38 float input_scale_factor,
39 float weight_scale_factor,
40 float *ptr_output_scale_factor,
43 uint32_t num_rows_padded,
44 uint32_t num_columns_padded);
45 float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements);
46 float ScaleFactorForQuantization(std::vector<std::vector<float>> &input_vectors, float target_max);
47 float ScaleFactorForQuantization(std::vector<std::vector<float>> &input_vectors,
51 void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t num_elements, float scale_factor);
52 void QuantizeVector16(std::vector<std::vector<float>> &input_vectors,
53 int16_t *ptr_int_memory,
55 uint32_t num_group_size,
57 void ReQuantizeVector16(int16_t *ptr_int_memory, uint32_t num_elements, float prev_scale_factor, float scale_factor);
58 bool IntegrityCheckAffine16(float *ptr_float_weights,
59 float *ptr_float_biases,
60 int16_t *ptr_int_weights,
61 int32_t *ptr_int_biases,
62 float weight_scale_factor,
63 float output_scale_factor,
66 uint32_t num_rows_padded,
67 uint32_t num_columns_padded);
68 bool IntegrityCheckAffineWeights16(float *ptr_float_weights,
69 int16_t *ptr_int_weights,
70 float weight_scale_factor,
73 uint32_t num_rows_padded,
74 uint32_t num_columns_padded);
75 void QuantizeBias16(float *ptr_float_biases,
76 int32_t *ptr_int_biases,
77 float input_scale_factor,
78 float weight_scale_factor,
79 float *ptr_output_scale_factor,
81 void DeQuantizeVector16(int16_t *ptr_int_memory, std::vector<float> &float_vector, float scale_factor);
82 void DeQuantizeVector32(int32_t *ptr_int_memory, std::vector<float> &float_vector, float scale_factor);
83 void DeQuantizeVector32(int32_t *ptr_int_memory,
84 std::vector<float> &float_vector,
86 uint32_t num_group_size,
91 void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases, int8_t *ptr_int_weights, intel_compound_bias_t *ptr_int_biases,
92 float input_scale_factor, float *ptr_weight_scale_factor, float *ptr_output_scale_factor,
93 uint32_t num_rows, uint32_t num_columns, uint32_t num_rows_padded, uint32_t num_columns_padded);
94 void QuantizeBias8(float *ptr_float_biases, intel_compound_bias_t *ptr_int_biases, float input_scale_factor,
95 float weight_scale_factor, float *ptr_output_scale_factor, uint32_t num_rows);
96 bool IntegrityCheckAffine8(float *ptr_float_weights, float *ptr_float_biases, int8_t *ptr_int_weights, intel_compound_bias_t *ptr_int_biases,
97 float weight_scale_factor, float output_scale_factor, uint32_t num_rows, uint32_t num_columns,
98 uint32_t num_rows_padded, uint32_t num_columns_padded);