2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "kernels/Utils.h"
24 namespace luci_interpreter
29 luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor,
30 BaseRuntimeGraph *runtime_graph)
32 luci_interpreter::RuntimeShape input_shape = getTensorShape(circle_tensor);
34 #ifndef DIS_DYN_SHAPES
35 auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(circle_tensor);
36 if (dynamic_shape_vector != nullptr)
38 input_shape.resize(dynamic_shape_vector->dimensionsCount());
40 for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
42 input_shape.setDim(n, dynamic_shape_vector->dims(n));
45 #endif // DIS_DYN_SHAPES
50 void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
54 case Activation::NONE:
55 *activation_min = std::numeric_limits<T>::lowest();
56 *activation_max = std::numeric_limits<T>::max();
58 case Activation::RELU:
60 *activation_max = std::numeric_limits<T>::max();
62 case Activation::RELU_N1_TO_1:
66 case Activation::RELU6:
71 assert(false && "Unsupported activation.");
75 void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
76 int32_t n_col, int32_t *output)
78 for (int i = 0; i < n_row; ++i)
81 for (int j = 0; j < n_col; ++j)
85 output[i] += row_sum * scalar;
89 template void calculateActivationRange(Activation activation, float *activation_min,
90 float *activation_max);
91 template void calculateActivationRange(Activation activation, int32_t *activation_min,
92 int32_t *activation_max);
93 template void calculateActivationRange(Activation activation, int64_t *activation_min,
94 int64_t *activation_max);
97 bool checkedLog2(const float x, int *log2_result)
99 const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
100 const float x_log2_rounded = std::round(x_log2);
101 const float x_log2_fracpart = x_log2 - x_log2_rounded;
103 *log2_result = static_cast<int>(x_log2_rounded);
104 return std::abs(x_log2_fracpart) < 1e-3f;
107 int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
109 const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
110 (1LL << (total_signed_bits - input_integer_bits)) /
111 (1LL << input_left_shift);
112 // Tighten bound using floor. Suppose that we could use the exact value.
113 // After scaling the difference, the result would be at the maximum. Thus we
114 // must ensure that our value has lower magnitude.
115 return static_cast<int>(std::floor(max_input_rescaled));
118 static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
119 int32_t zero_point, float scale,
120 int32_t *activation_min, int32_t *activation_max)
122 auto quantize = [scale, zero_point](float x) {
123 return zero_point + static_cast<int32_t>(std::round(x / scale));
128 case Activation::NONE:
129 case Activation::TANH:
130 *activation_min = qmin;
131 *activation_max = qmax;
133 case Activation::RELU:
134 *activation_min = std::max(qmin, quantize(0.0f));
135 *activation_max = qmax;
137 case Activation::RELU_N1_TO_1:
138 *activation_min = std::max(qmin, quantize(-1.0f));
139 *activation_max = std::min(qmax, quantize(1.0f));
141 case Activation::RELU6:
142 *activation_min = std::max(qmin, quantize(0.0f));
143 *activation_max = std::min(qmax, quantize(6.0f));
146 assert(false && "Unsupported activation.");
150 static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
151 const circle::Tensor *output,
152 int32_t *activation_min, int32_t *activation_max)
154 const float scale = Tensor::scale(output);
155 const int32_t zero_point = Tensor::zero_point(output);
157 calculateActivationRangeQuantizedImpl(activation, qmin, qmax, zero_point, zero_point,
158 activation_min, activation_max);
161 void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
162 float output_scale, DataType data_type,
163 int32_t *activation_min, int32_t *activation_max)
171 qmax = std::numeric_limits<uint8_t>::max();
174 qmin = -std::numeric_limits<int8_t>::max();
175 qmax = std::numeric_limits<int8_t>::max();
178 // For now, assume that signed int16 type implies signed symmetric quantization.
179 assert(output_zero_point == 0);
180 qmin = -std::numeric_limits<int16_t>::max();
181 qmax = std::numeric_limits<int16_t>::max();
184 assert(false && "Unsupported type.");
187 calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point, output_scale,
188 activation_min, activation_max);
191 void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
192 int32_t *activation_min, int32_t *activation_max)
194 assert(Tensor::zero_points(output).size() == 1);
195 const float scale = Tensor::scale(output);
196 const int32_t zero_point = Tensor::zero_point(output);
197 calculateActivationRangeQuantized(activation, zero_point, scale, Tensor::element_type(output),
198 activation_min, activation_max);
201 void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
203 if (double_multiplier == 0.0)
205 *quantized_multiplier = 0;
210 const double q = std::frexp(double_multiplier, shift);
211 auto q_fixed = static_cast<int64_t>(std::round(q * (int64_t(1) << 31)));
213 if (q_fixed == (int64_t(1) << 31))
218 assert(q_fixed <= std::numeric_limits<int32_t>::max());
219 // A shift amount smaller than -31 would cause all bits to be shifted out
220 // and thus all results would be zero. We implement that instead with
221 // q_fixed==0, so as to avoid hitting issues with right-shift
222 // operations with shift amounts greater than 31. Note that this happens
223 // roughly when abs(double_multiplier) < 2^-31 and the present handling means
224 // that we're effectively flushing tiny double_multiplier's to zero.
225 // We could conceivably handle values in the range (roughly) [32, 63]
226 // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
227 // the present handling is just doing 'flush denormals to zero'. We could
228 // reconsider and actually generate nonzero denormals if a need arises.
234 *quantized_multiplier = static_cast<int32_t>(q_fixed);
237 void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
240 assert(double_multiplier < 1.0);
241 assert(double_multiplier > 0.0);
243 quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
249 luci_interpreter::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
250 const circle::Tensor *input2)
252 const int num_input1_dims = Tensor::num_dims(input1);
253 const int num_input2_dims = Tensor::num_dims(input2);
254 const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
255 luci_interpreter::RuntimeShape output_shape(num_out_dims);
257 for (int i = 0; i < num_out_dims; ++i)
259 const int32_t input1_dim =
260 i < num_input1_dims ? Tensor::dim(input1, num_input1_dims - i - 1) : 1;
261 const int32_t input2_dim =
262 i < num_input2_dims ? Tensor::dim(input2, num_input2_dims - i - 1) : 1;
264 bool need_broadcast = input1_dim != input2_dim;
265 bool can_broadcast = input1_dim == 1 || input2_dim == 1;
266 LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
268 output_shape.setDim(num_out_dims - i - 1, std::max(input1_dim, input2_dim));
274 } // namespace kernels
275 } // namespace luci_interpreter