onert-micro/luci-interpreter/src/kernels/Utils.h

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
  19 #define LUCI_INTERPRETER_KERNELS_UTILS_H
  20
  21 #include "luci_interpreter/core/Tensor.h"
  22
  23 #include <tensorflow/lite/kernels/internal/types.h>
  24 #include <cassert>
  25 #include <cstdint>
  26
  27 namespace luci_interpreter
  28 {
  29 namespace kernels
  30 {
  31
  32 using Activation = luci_interpreter::FusedActFunc;
  33
  34 #define LUCI_INTERPRETER_CHECK(cond)                 \
  35   if (!(cond))                                       \
  36   {                                                  \
  37     assert(false && "LUCI_INTERPRETER_CHECK fails"); \
  38   }
  39
  40 inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
  41                               int32_t filter_size, int32_t out_size)
  42 {
  43   const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
  44   const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
  45   return padding > 0 ? padding : 0;
  46 }
  47
  48 inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
  49                                         int32_t filter_size, int32_t out_size, int32_t *offset)
  50 {
  51   int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
  52   int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
  53   total_padding = total_padding > 0 ? total_padding : 0;
  54   *offset = total_padding % 2;
  55   return total_padding / 2;
  56 }
  57
  58 inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
  59                                  int32_t stride, int32_t dilation_rate = 1)
  60 {
  61   const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
  62   switch (padding)
  63   {
  64     case Padding::SAME:
  65       return (image_size + stride - 1) / stride;
  66     case Padding::VALID:
  67       return (image_size + stride - effective_filter_size) / stride;
  68     default:
  69       assert(false);
  70       return 0;
  71   }
  72 }
  73
  74 inline int32_t calcOffset(const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2,
  75                           int32_t d3)
  76 {
  77
  78   return ((d0 * Tensor::dim(tensor, 1) + d1) * Tensor::dim(tensor, 2) + d2) *
  79            Tensor::dim(tensor, 3) +
  80          d3;
  81 }
  82
  83 template <typename T>
  84 void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
  85
  86 tflite::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
  87                                                 const circle::Tensor *input2);
  88
  89 // Helper wrapper to hide broadcast logic
  90 template <typename T> class BroadcastableWrapper
  91 {
  92 public:
  93   BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
  94
  95   T operator[](int idx) { return _v[idx * _stride]; }
  96
  97 private:
  98   const std::vector<T> &_v;
  99   int _stride;
 100 };
 101
 102 inline tflite::RuntimeShape getTensorShape(const circle::Tensor *tensor)
 103 {
 104   if (tensor == nullptr)
 105     return tflite::RuntimeShape();
 106
 107   tflite::RuntimeShape runtime_shape(Tensor::num_dims(tensor));
 108   for (int i = 0; i < Tensor::num_dims(tensor); ++i)
 109   {
 110     runtime_shape.SetDim(i, Tensor::dim(tensor, i));
 111   }
 112   return runtime_shape;
 113 }
 114
 115 template <typename T> const T *getTensorData(const uint8_t *tensor_data)
 116 {
 117   return tensor_data != nullptr ? reinterpret_cast<const T *>(tensor_data) : nullptr;
 118 }
 119
 120 template <typename T> T *getTensorData(uint8_t *tensor_data)
 121 {
 122   return tensor_data != nullptr ? reinterpret_cast<T *>(tensor_data) : nullptr;
 123 }
 124
 125 // A list of tensors in a format that can be used by kernels like split and
 126 // concatenation.
 127 template <typename T, bool is_const> class VectorOfTensors
 128 {
 129 public:
 130   using ElementT = typename std::conditional<is_const, const T, T>::type;
 131   using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
 132
 133   // Build with the tensors in 'tensor_list'.
 134   explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
 135   {
 136     const int num_tensors = tensor_list.size();
 137
 138     all_data_.reserve(num_tensors);
 139     all_shape_.reserve(num_tensors);
 140     all_shape_ptr_.reserve(num_tensors);
 141
 142     for (TensorT *tensor : tensor_list)
 143     {
 144       all_data_.push_back(getTensorData<T>(tensor));
 145       all_shape_.push_back(getTensorShape(tensor));
 146     }
 147
 148     // Taking the pointer from inside a std::vector is only OK if the vector is
 149     // never modified, so we populate all_shape in the previous loop and then we
 150     // are free to grab iterators here.
 151     for (tflite::RuntimeShape &shape : all_shape_)
 152     {
 153       all_shape_ptr_.push_back(&shape);
 154     }
 155   }
 156   // Return a pointer to the data pointers of all tensors in the list. For
 157   // example:
 158   //   float* const* f = v.data();
 159   //   f[0][1] is the second element of the first tensor.
 160   ElementT *const *data() const { return all_data_.data(); }
 161
 162   // Return a pointer the shape pointers of all tensors in the list. For
 163   // example:
 164   //   const RuntimeShape* const* d = v.dims();
 165   //   dims[1] are the dimensions of the second tensor in the list.
 166   const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
 167
 168 private:
 169   std::vector<ElementT *> all_data_;
 170   std::vector<tflite::RuntimeShape> all_shape_;
 171   std::vector<tflite::RuntimeShape *> all_shape_ptr_;
 172 };
 173
 174 #ifndef DIS_QUANT
 175 void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
 176                                        int32_t *activation_min, int32_t *activation_max);
 177 void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
 178                                        float output_scale, DataType data_type,
 179                                        int32_t *activation_min, int32_t *activation_max);
 180
 181 template <typename T> constexpr bool one_of_types() { return false; }
 182
 183 // Checks if T is equal to one of {U,Other} types
 184 template <typename T, typename U, typename... Other> constexpr bool one_of_types()
 185 {
 186   return std::is_same<T, U>::value || one_of_types<T, Other...>();
 187 }
 188
 189 void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
 190                                     int32_t n_col, int32_t *output);
 191
 192 /**
 193  * Fills activation min and max parameters depending on given data type and activation
 194  *
 195  * T is a template parameter, so after optimization this code left with only required if case
 196  *
 197  * @tparam T data type of arithmetic operation output tensor
 198  * @param params tflite params to fill
 199  * @param activation luci_interpreter::Activation of arithmetic operation
 200  */
 201 template <typename T>
 202 void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
 203 {
 204   static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
 205
 206   if (std::is_same<T, float>::value)
 207     calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
 208   if (std::is_same<T, int32_t>::value)
 209     calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
 210   else
 211     calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
 212 }
 213
 214 // Decompose a double multiplier into a Q0.31 int32 representation of its
 215 // significand, and shift representation of its exponent.
 216 //
 217 // Handles an arbitrary positive multiplier. The 'shift' output-value is
 218 // basically the 'floating-point exponent' of the multiplier:
 219 // Negative for a right-shift (when the multiplier is <1), positive for a
 220 // left-shift (when the multiplier is >1)
 221 void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
 222
 223 // Decompose a double multiplier into a Q0.31 int32 representation of its
 224 // significand, and shift representation of NEGATIVE its exponent ---
 225 // this is intended as a RIGHT-shift.
 226 //
 227 // Restricted to the case where the multiplier < 1 (and non-negative).
 228 void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
 229                                          int *left_shift);
 230
 231 inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
 232                                                float output_scale)
 233 {
 234   const double input_product_scale = static_cast<double>(input_scale * filter_scale);
 235   LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
 236   return input_product_scale / static_cast<double>(output_scale);
 237 }
 238
 239 // TODO rename getQuantizedConvolutionMultiplers to something more general
 240 // it is used for non conv operators too
 241 inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
 242                                                              const std::vector<float> &filter_scale,
 243                                                              float output_scale)
 244 {
 245   std::vector<double> effective_output_scales;
 246   size_t n = filter_scale.size();
 247   effective_output_scales.reserve(n);
 248   for (size_t i = 0; i < n; ++i)
 249   {
 250     effective_output_scales.push_back(
 251       getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
 252   }
 253   return effective_output_scales;
 254 }
 255
 256 struct ChannelQuantMultipliers
 257 {
 258   int shift;
 259   int32_t multiplier;
 260   ChannelQuantMultipliers() = default;
 261 };
 262
 263 inline std::vector<ChannelQuantMultipliers>
 264 quantizeMultipliers(const std::vector<double> &effective_scale)
 265 {
 266   size_t n = effective_scale.size();
 267   std::vector<ChannelQuantMultipliers> params(n);
 268   for (size_t i = 0; i < n; ++i)
 269   {
 270     quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
 271   }
 272   return params;
 273 }
 274
 275 // A list of quantized tensors in a format that can be used by kernels like
 276 // split and concatenation.
 277 template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
 278 {
 279 public:
 280   using typename VectorOfTensors<uint8_t, is_const>::TensorT;
 281
 282   // Build with the tensors in 'tensor_list'.
 283   explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
 284     : VectorOfTensors<uint8_t, is_const>(tensor_list)
 285   {
 286     for (TensorT *tensor : tensor_list)
 287     {
 288       zero_point_.push_back(tensor->zero_point());
 289       scale_.push_back(tensor->scale());
 290     }
 291   }
 292
 293   const float *scale() const { return scale_.data(); }
 294   const int32_t *zero_point() const { return zero_point_.data(); }
 295
 296 private:
 297   std::vector<int32_t> zero_point_;
 298   std::vector<float> scale_;
 299 };
 300 #endif // DIS_QUANT
 301
 302 } // namespace kernels
 303 } // namespace luci_interpreter
 304
 305 #endif // LUCI_INTERPRETER_KERNELS_UTILS_H