onert-micro/luci-interpreter/src/kernels/Utils.h

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
  19 #define LUCI_INTERPRETER_KERNELS_UTILS_H
  20
  21 #include "luci_interpreter/core/Tensor.h"
  22 #include "Builders.h"
  23 #include "Params.h"
  24 #include <cassert>
  25 #include <cstdint>
  26
  27 #include <cmath>
  28
  29 namespace luci_interpreter
  30 {
  31 namespace kernels
  32 {
  33
  34 using Activation = luci_interpreter::FusedActFunc;
  35
  36 #define LUCI_INTERPRETER_CHECK(cond)                 \
  37   if (!(cond))                                       \
  38   {                                                  \
  39     assert(false && "LUCI_INTERPRETER_CHECK fails"); \
  40   }
  41
  42 inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
  43                               int32_t filter_size, int32_t out_size)
  44 {
  45   const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
  46   const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
  47   return padding > 0 ? padding : 0;
  48 }
  49
  50 inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
  51                                         int32_t filter_size, int32_t out_size, int32_t *offset)
  52 {
  53   int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
  54   int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
  55   total_padding = total_padding > 0 ? total_padding : 0;
  56   *offset = total_padding % 2;
  57   return total_padding / 2;
  58 }
  59
  60 inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
  61                                  int32_t stride, int32_t dilation_rate = 1)
  62 {
  63   const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
  64   switch (padding)
  65   {
  66     case Padding::SAME:
  67       assert(stride != 0);
  68       return (image_size + stride - 1) / stride;
  69     case Padding::VALID:
  70       assert(stride != 0);
  71       return (image_size + stride - effective_filter_size) / stride;
  72     default:
  73       assert(false);
  74       return 0;
  75   }
  76 }
  77
  78 inline int32_t calcOffset(const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2,
  79                           int32_t d3)
  80 {
  81
  82   return ((d0 * Tensor::dim(tensor, 1) + d1) * Tensor::dim(tensor, 2) + d2) *
  83            Tensor::dim(tensor, 3) +
  84          d3;
  85 }
  86
  87 template <typename T>
  88 void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
  89
  90 luci_interpreter::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
  91                                                           const circle::Tensor *input2);
  92
  93 // Helper wrapper to hide broadcast logic
  94 template <typename T> class BroadcastableWrapper
  95 {
  96 public:
  97   BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
  98
  99   T operator[](int idx) { return _v[idx * _stride]; }
 100
 101 private:
 102   const std::vector<T> &_v;
 103   int _stride;
 104 };
 105
 106 inline luci_interpreter::RuntimeShape getTensorShape(const circle::Tensor *tensor)
 107 {
 108   if (tensor == nullptr)
 109     return luci_interpreter::RuntimeShape();
 110
 111   auto const tensor_shape = Tensor::tensor_shape(tensor);
 112
 113   luci_interpreter::RuntimeShape runtime_shape(tensor_shape.size());
 114   for (int i = 0; i < tensor_shape.size(); ++i)
 115   {
 116     runtime_shape.setDim(i, tensor_shape[i]);
 117   }
 118   return runtime_shape;
 119 }
 120
 121 inline void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph,
 122                           int32_t *dims)
 123 {
 124   if (tensor == nullptr)
 125   {
 126     dims = nullptr;
 127     return;
 128   }
 129
 130 #ifndef DIS_DYN_SHAPES
 131   auto *dynamic_shape_vector = runtime_graph->getDynamicShapeTensor(tensor);
 132   if (dynamic_shape_vector != nullptr)
 133   {
 134     for (int n = 0; n < dynamic_shape_vector->dimensionsCount(); ++n)
 135     {
 136       dims[n] = dynamic_shape_vector->dims(n);
 137     }
 138   }
 139   else
 140   {
 141     auto const tensor_shape = Tensor::tensor_shape(tensor);
 142     assert(tensor_shape.size() <= kMaxSmallSize);
 143     for (int i = 0; i < tensor_shape.size(); ++i)
 144     {
 145       dims[i] = tensor_shape[i];
 146     }
 147   }
 148 #else
 149   auto const tensor_shape = Tensor::tensor_shape(tensor);
 150   assert(tensor_shape.size() <= kMaxSmallSize);
 151   for (int i = 0; i < tensor_shape.size(); ++i)
 152   {
 153     dims[i] = tensor_shape[i];
 154   }
 155 #endif // DIS_DYN_SHAPES
 156 }
 157
 158 template <typename T> const T *getTensorData(const uint8_t *tensor_data)
 159 {
 160   return tensor_data != nullptr ? reinterpret_cast<const T *>(tensor_data) : nullptr;
 161 }
 162
 163 template <typename T> inline T *getTensorData(uint8_t *tensor_data)
 164 {
 165   return tensor_data != nullptr ? reinterpret_cast<T *>(tensor_data) : nullptr;
 166 }
 167
 168 luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor,
 169                                                      BaseRuntimeGraph *runtime_graph);
 170
 171 // A list of tensors in a format that can be used by kernels like split and
 172 // concatenation.
 173 template <typename T, bool is_const> class VectorOfTensors
 174 {
 175 public:
 176   using ElementT = typename std::conditional<is_const, const T, T>::type;
 177   using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
 178
 179   // Build with the tensors in 'tensor_list'.
 180   explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
 181   {
 182     const int num_tensors = tensor_list.size();
 183
 184     all_data_.reserve(num_tensors);
 185     all_shape_.reserve(num_tensors);
 186     all_shape_ptr_.reserve(num_tensors);
 187
 188     for (TensorT *tensor : tensor_list)
 189     {
 190       all_data_.push_back(getTensorData<T>(tensor));
 191       all_shape_.push_back(getTensorShape(tensor));
 192     }
 193
 194     // Taking the pointer from inside a std::vector is only OK if the vector is
 195     // never modified, so we populate all_shape in the previous loop and then we
 196     // are free to grab iterators here.
 197     for (luci_interpreter::RuntimeShape &shape : all_shape_)
 198     {
 199       all_shape_ptr_.push_back(&shape);
 200     }
 201   }
 202   // Return a pointer to the data pointers of all tensors in the list. For
 203   // example:
 204   //   float* const* f = v.data();
 205   //   f[0][1] is the second element of the first tensor.
 206   ElementT *const *data() const { return all_data_.data(); }
 207
 208   // Return a pointer the shape pointers of all tensors in the list. For
 209   // example:
 210   //   const RuntimeShape* const* d = v.dims();
 211   //   dims[1] are the dimensions of the second tensor in the list.
 212   const luci_interpreter::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
 213
 214 private:
 215   std::vector<ElementT *> all_data_;
 216   std::vector<luci_interpreter::RuntimeShape> all_shape_;
 217   std::vector<luci_interpreter::RuntimeShape *> all_shape_ptr_;
 218 };
 219
 220 template <typename T> constexpr bool one_of_types() { return false; }
 221
 222 // Checks if T is equal to one of {U,Other} types
 223 template <typename T, typename U, typename... Other> constexpr bool one_of_types()
 224 {
 225   return std::is_same<T, U>::value || one_of_types<T, Other...>();
 226 }
 227
 228 void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
 229                                     int32_t n_col, int32_t *output);
 230
 231 #ifndef DIS_QUANT
 232 bool checkedLog2(const float x, int *log2_result);
 233
 234 int calculateInputRadius(int input_integer_bits, int input_left_shift, int total_signed_bits);
 235
 236 void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
 237                                        int32_t *activation_min, int32_t *activation_max);
 238
 239 void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
 240                                        float output_scale, DataType data_type,
 241                                        int32_t *activation_min, int32_t *activation_max);
 242
 243 // Decompose a double multiplier into a Q0.31 int32 representation of its
 244 // significand, and shift representation of its exponent.
 245 //
 246 // Handles an arbitrary positive multiplier. The 'shift' output-value is
 247 // basically the 'floating-point exponent' of the multiplier:
 248 // Negative for a right-shift (when the multiplier is <1), positive for a
 249 // left-shift (when the multiplier is >1)
 250 void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
 251
 252 // Decompose a double multiplier into a Q0.31 int32 representation of its
 253 // significand, and shift representation of NEGATIVE its exponent ---
 254 // this is intended as a RIGHT-shift.
 255 //
 256 // Restricted to the case where the multiplier < 1 (and non-negative).
 257 void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
 258                                          int *left_shift);
 259
 260 inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
 261                                                float output_scale)
 262 {
 263   const double input_product_scale = static_cast<double>(input_scale * filter_scale);
 264   LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
 265   return input_product_scale / static_cast<double>(output_scale);
 266 }
 267
 268 // TODO rename getQuantizedConvolutionMultiplers to something more general
 269 // it is used for non conv operators too
 270 inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
 271                                                              const std::vector<float> &filter_scale,
 272                                                              float output_scale)
 273 {
 274   std::vector<double> effective_output_scales;
 275   size_t n = filter_scale.size();
 276   effective_output_scales.reserve(n);
 277   for (size_t i = 0; i < n; ++i)
 278   {
 279     effective_output_scales.push_back(
 280       getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
 281   }
 282   return effective_output_scales;
 283 }
 284
 285 struct ChannelQuantMultipliers
 286 {
 287   int shift;
 288   int32_t multiplier;
 289   ChannelQuantMultipliers() = default;
 290 };
 291
 292 inline std::vector<ChannelQuantMultipliers>
 293 quantizeMultipliers(const std::vector<double> &effective_scale)
 294 {
 295   size_t n = effective_scale.size();
 296   std::vector<ChannelQuantMultipliers> params(n);
 297   for (size_t i = 0; i < n; ++i)
 298   {
 299     quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
 300   }
 301   return params;
 302 }
 303
 304 // A list of quantized tensors in a format that can be used by kernels like
 305 // split and concatenation.
 306 template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
 307 {
 308 public:
 309   using typename VectorOfTensors<uint8_t, is_const>::TensorT;
 310
 311   // Build with the tensors in 'tensor_list'.
 312   explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
 313     : VectorOfTensors<uint8_t, is_const>(tensor_list)
 314   {
 315     for (TensorT *tensor : tensor_list)
 316     {
 317       zero_point_.push_back(tensor->zero_point());
 318       scale_.push_back(tensor->scale());
 319     }
 320   }
 321
 322   const float *scale() const { return scale_.data(); }
 323   const int32_t *zero_point() const { return zero_point_.data(); }
 324
 325 private:
 326   std::vector<int32_t> zero_point_;
 327   std::vector<float> scale_;
 328 };
 329 #endif // DIS_QUANT
 330
 331 } // namespace kernels
 332 } // namespace luci_interpreter
 333
 334 #endif // LUCI_INTERPRETER_KERNELS_UTILS_H