onert-micro/luci-interpreter/pal/common/PALUtils.h

   1 /*
   2  * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *    http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef LUCI_INTERPRETER_PAL_UTILS_H
  19 #define LUCI_INTERPRETER_PAL_UTILS_H
  20
  21 #include <cassert>
  22
  23 namespace luci_interpreter_pal
  24 {
  25
  26 // Table of sigmoid(i/24) at 0.16 format - 256 elements.
  27 // We use combined sigmoid and tanh look-up table, since
  28 // tanh(x) = 2*sigmoid(2*x) -1.
  29 // Both functions are symmetric, so the LUT table is only needed
  30 // for the absolute value of the input.
  31 static const uint16_t sigmoid_table_uint16[256] = {
  32   32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498, 40149, 40794, 41432,
  33   42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255, 46817, 47369, 47911, 48443, 48964, 49475,
  34   49975, 50464, 50942, 51409, 51865, 52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485,
  35   55834, 56174, 56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288, 59519,
  36   59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441, 61599, 61750, 61896, 62036,
  37   62172, 62302, 62428, 62549, 62666, 62778, 62886, 62990, 63090, 63186, 63279, 63368, 63454, 63536,
  38   63615, 63691, 63765, 63835, 63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405,
  39   64450, 64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845, 64873, 64900,
  40   64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097, 65115, 65132, 65149, 65164, 65179,
  41   65194, 65208, 65221, 65234, 65246, 65258, 65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337,
  42   65345, 65352, 65360, 65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425,
  43   65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465, 65468, 65471, 65474,
  44   65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491, 65493, 65495, 65497, 65498, 65500, 65501,
  45   65503, 65504, 65505, 65507, 65508, 65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517,
  46   65517, 65518, 65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525, 65525,
  47   65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529, 65529, 65529, 65530, 65530,
  48   65530, 65530, 65531, 65531, 65531, 65531, 65531, 65532, 65532, 65532, 65532, 65532, 65532, 65533,
  49   65533, 65533, 65533, 65533, 65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
  50   65534, 65534, 65534, 65535};
  51
  52 inline std::int32_t saturatingRoundingDoublingHighMul(std::int32_t a, std::int32_t b)
  53 {
  54   bool overflow = a == b && a == std::numeric_limits<std::int32_t>::min();
  55   std::int64_t a_64(a);
  56   std::int64_t b_64(b);
  57   std::int64_t ab_64 = a_64 * b_64;
  58   std::int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
  59   std::int32_t ab_x2_high32 = static_cast<std::int32_t>((ab_64 + nudge) / (1ll << 31));
  60   return overflow ? std::numeric_limits<std::int32_t>::max() : ab_x2_high32;
  61 }
  62
  63 // Correctly-rounded-to-nearest division by a power-of-two.
  64 // Also known as a rounding arithmetic right shift.
  65 inline int32_t roundingDivideByPOT(int32_t x, int32_t exponent)
  66 {
  67   assert(exponent >= 0);
  68   assert(exponent <= 31);
  69   const int32_t mask = int32_t((1ll << exponent) - 1);
  70   const int32_t zero = int32_t(0);
  71   const int32_t one = int32_t(1);
  72   const int32_t remainder = x & mask;
  73   const int32_t threshold = (mask >> 1) + ((x < zero ? one : zero) & one);
  74   return (x >> exponent) + ((remainder > threshold ? one : zero) & one);
  75 }
  76
  77 inline int32_t multiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
  78 {
  79   int left_shift = shift > 0 ? shift : 0;
  80   int right_shift = shift > 0 ? 0 : -shift;
  81   return roundingDivideByPOT(
  82     saturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
  83 }
  84
  85 inline int32_t multiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x,
  86                                                               int32_t quantized_multiplier,
  87                                                               int left_shift)
  88 {
  89   return roundingDivideByPOT(saturatingRoundingDoublingHighMul(x, quantized_multiplier),
  90                              -left_shift);
  91 }
  92
  93 template <typename P> inline void getActivationParams(const P &params, int32_t *min, int32_t *max)
  94 {
  95   *min = params.quantized_activation_min;
  96   *max = params.quantized_activation_max;
  97 }
  98
  99 template <typename P> inline void getActivationParams(const P &params, float *min, float *max)
 100 {
 101   *min = params.float_activation_min;
 102   *max = params.float_activation_max;
 103 }
 104
 105 template <typename P> inline void getActivationParams(const P &params, int64_t *min, int64_t *max)
 106 {
 107   *min = params.int64_activation_min;
 108   *max = params.int64_activation_max;
 109 }
 110
 111 // Gets offset of index if reducing on axis. When reducing, the flattened offset
 112 // will not change, if the input index changes on the given axis. For example,
 113 // if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
 114 // then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
 115 // offset.
 116 inline size_t reducedOutputOffset(const int num_dims, const int *dims, const int *index,
 117                                   const int num_axis, const int *axis)
 118 {
 119   if (num_dims == 0)
 120   {
 121     return 0;
 122   }
 123   size_t offset = 0;
 124   for (int idx = 0; idx < num_dims; ++idx)
 125   {
 126     // if we need to skip this axis
 127     bool is_axis = false;
 128     if (axis != nullptr)
 129     {
 130       for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
 131       {
 132         if (idx == axis[axis_idx])
 133         {
 134           is_axis = true;
 135           break;
 136         }
 137       }
 138     }
 139     if (!is_axis)
 140     {
 141       offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
 142     }
 143   }
 144   return offset;
 145 }
 146
 147 // Gets next index to iterate through a multidimensional array.
 148 inline bool nextIndex(const int num_dims, const int *dims, int *current)
 149 {
 150   if (num_dims == 0)
 151   {
 152     return false;
 153   }
 154   int carry = 1;
 155   for (int idx = num_dims - 1; idx >= 0; --idx)
 156   {
 157     int current_val = current[idx] + carry;
 158     if (dims[idx] == current_val)
 159     {
 160       current[idx] = 0;
 161     }
 162     else
 163     {
 164       current[idx] = current_val;
 165       carry = 0;
 166       break;
 167     }
 168   }
 169   return (carry == 0);
 170 }
 171
 172 // Get common shape dim, assert that they all agree.
 173 inline int MatchingDim(const luci_interpreter::RuntimeShape &shape1, int index1,
 174                        const luci_interpreter::RuntimeShape &shape2, int index2)
 175 {
 176   assert(shape1.dims(index1) == shape2.dims(index2));
 177   return shape1.dims(index1);
 178 }
 179
 180 } // namespace luci_interpreter_pal
 181
 182 #endif // LUCI_INTERPRETER_PAL_UTILS_H