2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "kernels/Utils.h"
24 namespace luci_interpreter
30 void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
34 case Activation::NONE:
35 *activation_min = std::numeric_limits<T>::lowest();
36 *activation_max = std::numeric_limits<T>::max();
38 case Activation::RELU:
40 *activation_max = std::numeric_limits<T>::max();
42 case Activation::RELU_N1_TO_1:
46 case Activation::RELU6:
51 assert(false && "Unsupported activation.");
55 void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
56 int32_t n_col, int32_t *output)
58 for (int i = 0; i < n_row; ++i)
61 for (int j = 0; j < n_col; ++j)
65 output[i] += row_sum * scalar;
69 template void calculateActivationRange(Activation activation, float *activation_min,
70 float *activation_max);
71 template void calculateActivationRange(Activation activation, int32_t *activation_min,
72 int32_t *activation_max);
73 template void calculateActivationRange(Activation activation, int64_t *activation_min,
74 int64_t *activation_max);
78 static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
79 int32_t zero_point, float scale,
80 int32_t *activation_min, int32_t *activation_max)
82 auto quantize = [scale, zero_point](float x) {
83 return zero_point + static_cast<int32_t>(std::round(x / scale));
88 case Activation::NONE:
89 case Activation::TANH:
90 *activation_min = qmin;
91 *activation_max = qmax;
93 case Activation::RELU:
94 *activation_min = std::max(qmin, quantize(0.0f));
95 *activation_max = qmax;
97 case Activation::RELU_N1_TO_1:
98 *activation_min = std::max(qmin, quantize(-1.0f));
99 *activation_max = std::min(qmax, quantize(1.0f));
101 case Activation::RELU6:
102 *activation_min = std::max(qmin, quantize(0.0f));
103 *activation_max = std::min(qmax, quantize(6.0f));
106 assert(false && "Unsupported activation.");
110 static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
111 const circle::Tensor *output,
112 int32_t *activation_min, int32_t *activation_max)
114 const float scale = Tensor::scale(output);
115 const int32_t zero_point = Tensor::zero_point(output);
117 calculateActivationRangeQuantizedImpl(activation, qmin, qmax, zero_point, zero_point,
118 activation_min, activation_max);
121 void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
122 float output_scale, DataType data_type,
123 int32_t *activation_min, int32_t *activation_max)
131 qmax = std::numeric_limits<uint8_t>::max();
134 qmin = -std::numeric_limits<int8_t>::max();
135 qmax = std::numeric_limits<int8_t>::max();
138 // For now, assume that signed int16 type implies signed symmetric quantization.
139 assert(output_zero_point == 0);
140 qmin = -std::numeric_limits<int16_t>::max();
141 qmax = std::numeric_limits<int16_t>::max();
144 assert(false && "Unsupported type.");
147 calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point, output_scale,
148 activation_min, activation_max);
151 void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
152 int32_t *activation_min, int32_t *activation_max)
154 assert(Tensor::zero_points(output).size() == 1);
155 const float scale = Tensor::scale(output);
156 const int32_t zero_point = Tensor::zero_point(output);
157 calculateActivationRangeQuantized(activation, zero_point, scale, Tensor::element_type(output),
158 activation_min, activation_max);
161 void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
163 if (double_multiplier == 0.0)
165 *quantized_multiplier = 0;
170 const double q = std::frexp(double_multiplier, shift);
171 auto q_fixed = static_cast<int64_t>(std::round(q * (int64_t(1) << 31)));
173 if (q_fixed == (int64_t(1) << 31))
178 assert(q_fixed <= std::numeric_limits<int32_t>::max());
179 // A shift amount smaller than -31 would cause all bits to be shifted out
180 // and thus all results would be zero. We implement that instead with
181 // q_fixed==0, so as to avoid hitting issues with right-shift
182 // operations with shift amounts greater than 31. Note that this happens
183 // roughly when abs(double_multiplier) < 2^-31 and the present handling means
184 // that we're effectively flushing tiny double_multiplier's to zero.
185 // We could conceivably handle values in the range (roughly) [32, 63]
186 // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
187 // the present handling is just doing 'flush denormals to zero'. We could
188 // reconsider and actually generate nonzero denormals if a need arises.
194 *quantized_multiplier = static_cast<int32_t>(q_fixed);
197 void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
200 assert(double_multiplier < 1.0);
201 assert(double_multiplier > 0.0);
203 quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
209 tflite::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
210 const circle::Tensor *input2)
212 const int num_input1_dims = Tensor::num_dims(input1);
213 const int num_input2_dims = Tensor::num_dims(input2);
214 const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
215 tflite::RuntimeShape output_shape(num_out_dims);
217 for (int i = 0; i < num_out_dims; ++i)
219 const int32_t input1_dim =
220 i < num_input1_dims ? Tensor::dim(input1, num_input1_dims - i - 1) : 1;
221 const int32_t input2_dim =
222 i < num_input2_dims ? Tensor::dim(input2, num_input2_dims - i - 1) : 1;
224 bool need_broadcast = input1_dim != input2_dim;
225 bool can_broadcast = input1_dim == 1 || input2_dim == 1;
226 LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
228 output_shape.SetDim(num_out_dims - i - 1, std::max(input1_dim, input2_dim));
234 } // namespace kernels
235 } // namespace luci_interpreter