Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / gna_plugin / quantization / quantization.h
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #pragma once
6
7 #include <math.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <vector>
11 #include <cstdint>
12
13 #define MAX_OUT_MULTIPLIER 230
14 #define MAX_VAL_1B_WEIGHT 127
15 #define MAX_VAL_2B_WEIGHT 16384
16 #define MAX_VAL_2B_FEAT 16384
17 #ifdef DEBUG
18 #define QUANTWARNING(...) (fprintf(stderr, __VA_ARGS__))
19 #else
20 #define QUANTWARNING(...)
21 #endif
22
23 void QuantizeAffine16(float *ptr_float_weights,
24                       float *ptr_float_biases,
25                       int16_t *ptr_int_weights,
26                       int32_t *ptr_int_biases,
27                       float input_scale_factor,
28                       float *ptr_weight_scale_factor,
29                       float *ptr_output_scale_factor,
30                       uint32_t num_rows,
31                       uint32_t num_columns,
32                       uint32_t num_rows_padded,
33                       uint32_t num_columns_padded);
34 void FixedQuantizeAffine16(float *ptr_float_weights,
35                            float *ptr_float_biases,
36                            int16_t *ptr_int_weights,
37                            int32_t *ptr_int_biases,
38                            float input_scale_factor,
39                            float weight_scale_factor,
40                            float *ptr_output_scale_factor,
41                            uint32_t num_rows,
42                            uint32_t num_columns,
43                            uint32_t num_rows_padded,
44                            uint32_t num_columns_padded);
45 float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements);
46 float ScaleFactorForQuantization(std::vector<std::vector<float>> &input_vectors, float target_max);
47 float ScaleFactorForQuantization(std::vector<std::vector<float>> &input_vectors,
48                                  int index,
49                                  int num_group_size,
50                                  float target_max);
51 void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t num_elements, float scale_factor);
52 void QuantizeVector16(std::vector<std::vector<float>> &input_vectors,
53                       int16_t *ptr_int_memory,
54                       uint32_t index,
55                       uint32_t num_group_size,
56                       float scale_factor);
57 void ReQuantizeVector16(int16_t *ptr_int_memory, uint32_t num_elements, float prev_scale_factor, float scale_factor);
58 bool IntegrityCheckAffine16(float *ptr_float_weights,
59                             float *ptr_float_biases,
60                             int16_t *ptr_int_weights,
61                             int32_t *ptr_int_biases,
62                             float weight_scale_factor,
63                             float output_scale_factor,
64                             uint32_t num_rows,
65                             uint32_t num_columns,
66                             uint32_t num_rows_padded,
67                             uint32_t num_columns_padded);
68 bool IntegrityCheckAffineWeights16(float *ptr_float_weights,
69                                    int16_t *ptr_int_weights,
70                                    float weight_scale_factor,
71                                    uint32_t num_rows,
72                                    uint32_t num_columns,
73                                    uint32_t num_rows_padded,
74                                    uint32_t num_columns_padded);
75 void QuantizeBias16(float *ptr_float_biases,
76                     int32_t *ptr_int_biases,
77                     float input_scale_factor,
78                     float weight_scale_factor,
79                     float *ptr_output_scale_factor,
80                     uint32_t num_rows);
81 void DeQuantizeVector16(int16_t *ptr_int_memory, std::vector<float> &float_vector, float scale_factor);
82 void DeQuantizeVector32(int32_t *ptr_int_memory, std::vector<float> &float_vector, float scale_factor);
83 void DeQuantizeVector32(int32_t *ptr_int_memory,
84                         std::vector<float> &float_vector,
85                         uint32_t index,
86                         uint32_t num_group_size,
87                         float scale_factor);
88
89 #include "gna-api.h"
90
91 void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases, int8_t *ptr_int_weights, intel_compound_bias_t *ptr_int_biases,
92                      float input_scale_factor, float *ptr_weight_scale_factor, float *ptr_output_scale_factor,
93                      uint32_t num_rows, uint32_t num_columns, uint32_t num_rows_padded, uint32_t num_columns_padded);
94 void QuantizeBias8(float *ptr_float_biases, intel_compound_bias_t  *ptr_int_biases, float input_scale_factor,
95                    float weight_scale_factor, float *ptr_output_scale_factor, uint32_t num_rows);
96 bool IntegrityCheckAffine8(float *ptr_float_weights, float *ptr_float_biases, int8_t *ptr_int_weights, intel_compound_bias_t *ptr_int_biases,
97                            float weight_scale_factor, float output_scale_factor, uint32_t num_rows, uint32_t num_columns,
98                            uint32_t num_rows_padded, uint32_t num_columns_padded);
99
100