onert-micro/luci-interpreter/src/kernels/Utils.cpp

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #include "kernels/Utils.h"
  19
  20 #include <cassert>
  21 #include <cmath>
  22 #include <limits>
  23
  24 namespace luci_interpreter
  25 {
  26 namespace kernels
  27 {
  28
  29 luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor,
  30                                                      BaseRuntimeGraph *runtime_graph)
  31 {
  32   luci_interpreter::RuntimeShape input_shape = getTensorShape(circle_tensor);
  33
  34 #ifndef DIS_DYN_SHAPES
  35   auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(circle_tensor);
  36   if (dynamic_shape_vector != nullptr)
  37   {
  38     input_shape.resize(dynamic_shape_vector->dimensionsCount());
  39
  40     for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
  41     {
  42       input_shape.setDim(n, dynamic_shape_vector->dims(n));
  43     }
  44   }
  45 #endif // DIS_DYN_SHAPES
  46   return input_shape;
  47 }
  48
  49 template <typename T>
  50 void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
  51 {
  52   switch (activation)
  53   {
  54     case Activation::NONE:
  55       *activation_min = std::numeric_limits<T>::lowest();
  56       *activation_max = std::numeric_limits<T>::max();
  57       break;
  58     case Activation::RELU:
  59       *activation_min = 0;
  60       *activation_max = std::numeric_limits<T>::max();
  61       break;
  62     case Activation::RELU_N1_TO_1:
  63       *activation_min = -1;
  64       *activation_max = 1;
  65       break;
  66     case Activation::RELU6:
  67       *activation_min = 0;
  68       *activation_max = 6;
  69       break;
  70     default:
  71       assert(false && "Unsupported activation.");
  72   }
  73 }
  74
  75 void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
  76                                     int32_t n_col, int32_t *output)
  77 {
  78   for (int i = 0; i < n_row; ++i)
  79   {
  80     int32_t row_sum = 0;
  81     for (int j = 0; j < n_col; ++j)
  82     {
  83       row_sum += *matrix++;
  84     }
  85     output[i] += row_sum * scalar;
  86   }
  87 }
  88
  89 template void calculateActivationRange(Activation activation, float *activation_min,
  90                                        float *activation_max);
  91 template void calculateActivationRange(Activation activation, int32_t *activation_min,
  92                                        int32_t *activation_max);
  93 template void calculateActivationRange(Activation activation, int64_t *activation_min,
  94                                        int64_t *activation_max);
  95
  96 #ifndef DIS_QUANT
  97 bool checkedLog2(const float x, int *log2_result)
  98 {
  99   const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
 100   const float x_log2_rounded = std::round(x_log2);
 101   const float x_log2_fracpart = x_log2 - x_log2_rounded;
 102
 103   *log2_result = static_cast<int>(x_log2_rounded);
 104   return std::abs(x_log2_fracpart) < 1e-3f;
 105 }
 106
 107 int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits)
 108 {
 109   const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
 110                                     (1LL << (total_signed_bits - input_integer_bits)) /
 111                                     (1LL << input_left_shift);
 112   // Tighten bound using floor.  Suppose that we could use the exact value.
 113   // After scaling the difference, the result would be at the maximum.  Thus we
 114   // must ensure that our value has lower magnitude.
 115   return static_cast<int>(std::floor(max_input_rescaled));
 116 }
 117
 118 static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
 119                                                   int32_t zero_point, float scale,
 120                                                   int32_t *activation_min, int32_t *activation_max)
 121 {
 122   auto quantize = [scale, zero_point](float x) {
 123     return zero_point + static_cast<int32_t>(std::round(x / scale));
 124   };
 125
 126   switch (activation)
 127   {
 128     case Activation::NONE:
 129     case Activation::TANH:
 130       *activation_min = qmin;
 131       *activation_max = qmax;
 132       break;
 133     case Activation::RELU:
 134       *activation_min = std::max(qmin, quantize(0.0f));
 135       *activation_max = qmax;
 136       break;
 137     case Activation::RELU_N1_TO_1:
 138       *activation_min = std::max(qmin, quantize(-1.0f));
 139       *activation_max = std::min(qmax, quantize(1.0f));
 140       break;
 141     case Activation::RELU6:
 142       *activation_min = std::max(qmin, quantize(0.0f));
 143       *activation_max = std::min(qmax, quantize(6.0f));
 144       break;
 145     default:
 146       assert(false && "Unsupported activation.");
 147   }
 148 }
 149
 150 static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
 151                                                   const circle::Tensor *output,
 152                                                   int32_t *activation_min, int32_t *activation_max)
 153 {
 154   const float scale = Tensor::scale(output);
 155   const int32_t zero_point = Tensor::zero_point(output);
 156
 157   calculateActivationRangeQuantizedImpl(activation, qmin, qmax, zero_point, zero_point,
 158                                         activation_min, activation_max);
 159 }
 160
 161 void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
 162                                        float output_scale, DataType data_type,
 163                                        int32_t *activation_min, int32_t *activation_max)
 164 {
 165   int32_t qmin{};
 166   int32_t qmax{};
 167   switch (data_type)
 168   {
 169     case DataType::U8:
 170       qmin = 0;
 171       qmax = std::numeric_limits<uint8_t>::max();
 172       break;
 173     case DataType::S8:
 174       qmin = -std::numeric_limits<int8_t>::max();
 175       qmax = std::numeric_limits<int8_t>::max();
 176       break;
 177     case DataType::S16:
 178       // For now, assume that signed int16 type implies signed symmetric quantization.
 179       assert(output_zero_point == 0);
 180       qmin = -std::numeric_limits<int16_t>::max();
 181       qmax = std::numeric_limits<int16_t>::max();
 182       break;
 183     default:
 184       assert(false && "Unsupported type.");
 185   }
 186
 187   calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point, output_scale,
 188                                         activation_min, activation_max);
 189 }
 190
 191 void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
 192                                        int32_t *activation_min, int32_t *activation_max)
 193 {
 194   assert(Tensor::zero_points(output).size() == 1);
 195   const float scale = Tensor::scale(output);
 196   const int32_t zero_point = Tensor::zero_point(output);
 197   calculateActivationRangeQuantized(activation, zero_point, scale, Tensor::element_type(output),
 198                                     activation_min, activation_max);
 199 }
 200
 201 void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
 202 {
 203   if (double_multiplier == 0.0)
 204   {
 205     *quantized_multiplier = 0;
 206     *shift = 0;
 207     return;
 208   }
 209
 210   const double q = std::frexp(double_multiplier, shift);
 211   auto q_fixed = static_cast<int64_t>(std::round(q * (int64_t(1) << 31)));
 212
 213   if (q_fixed == (int64_t(1) << 31))
 214   {
 215     q_fixed /= 2;
 216     ++*shift;
 217   }
 218   assert(q_fixed <= std::numeric_limits<int32_t>::max());
 219   // A shift amount smaller than -31 would cause all bits to be shifted out
 220   // and thus all results would be zero. We implement that instead with
 221   // q_fixed==0, so as to avoid hitting issues with right-shift
 222   // operations with shift amounts greater than 31. Note that this happens
 223   // roughly when abs(double_multiplier) < 2^-31 and the present handling means
 224   // that we're effectively flushing tiny double_multiplier's to zero.
 225   // We could conceivably handle values in the range (roughly) [32, 63]
 226   // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
 227   // the present handling is just doing 'flush denormals to zero'. We could
 228   // reconsider and actually generate nonzero denormals if a need arises.
 229   if (*shift < -31)
 230   {
 231     *shift = 0;
 232     q_fixed = 0;
 233   }
 234   *quantized_multiplier = static_cast<int32_t>(q_fixed);
 235 }
 236
 237 void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
 238                                          int *left_shift)
 239 {
 240   assert(double_multiplier < 1.0);
 241   assert(double_multiplier > 0.0);
 242   int shift;
 243   quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
 244   assert(shift <= 0);
 245   *left_shift = shift;
 246 }
 247 #endif
 248
 249 luci_interpreter::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
 250                                                           const circle::Tensor *input2)
 251 {
 252   const int num_input1_dims = Tensor::num_dims(input1);
 253   const int num_input2_dims = Tensor::num_dims(input2);
 254   const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
 255   luci_interpreter::RuntimeShape output_shape(num_out_dims);
 256
 257   for (int i = 0; i < num_out_dims; ++i)
 258   {
 259     const int32_t input1_dim =
 260       i < num_input1_dims ? Tensor::dim(input1, num_input1_dims - i - 1) : 1;
 261     const int32_t input2_dim =
 262       i < num_input2_dims ? Tensor::dim(input2, num_input2_dims - i - 1) : 1;
 263
 264     bool need_broadcast = input1_dim != input2_dim;
 265     bool can_broadcast = input1_dim == 1 || input2_dim == 1;
 266     LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
 267
 268     output_shape.setDim(num_out_dims - i - 1, std::max(input1_dim, input2_dim));
 269   }
 270
 271   return output_shape;
 272 }
 273
 274 } // namespace kernels
 275 } // namespace luci_interpreter