compute/cker/include/cker/Utils.h

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *      http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef __NNFW_CKER_UTILS_H__
  19 #define __NNFW_CKER_UTILS_H__
  20
  21 #include "Shape.h"
  22
  23 #include <algorithm>
  24 #include <cstdint>
  25 #include <fixedpoint/fixedpoint.h>
  26
  27 namespace nnfw
  28 {
  29 namespace cker
  30 {
  31
  32 template <typename T>
  33 inline T ActivationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
  34 {
  35   return std::min<T>(std::max<T>(x, output_activation_min), output_activation_max);
  36 }
  37
  38 inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
  39 {
  40   int left_shift = shift > 0 ? shift : 0;
  41   int right_shift = shift > 0 ? 0 : -shift;
  42   return gemmlowp::RoundingDivideByPOT(
  43       gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier),
  44       right_shift);
  45 }
  46
  47 inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(int32_t x, int32_t quantized_multiplier,
  48                                                            int left_shift)
  49 {
  50   return gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier);
  51 }
  52
  53 inline int NodeOffset(int b, int h, int w, int height, int width)
  54 {
  55   return (b * height + h) * width + w;
  56 }
  57
  58 inline int CountLeadingZeros(uint32_t integer_input)
  59 {
  60   const uint32_t one_in_leading_positive = 1U << 31;
  61   int leading_zeros = 0;
  62   while (integer_input < one_in_leading_positive)
  63   {
  64     integer_input <<= 1;
  65     ++leading_zeros;
  66   }
  67   return leading_zeros;
  68 }
  69
  70 // Comment from tensorflow lite:
  71 //
  72 // DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
  73 // BROADCASTING.
  74 //
  75 // NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
  76 // rectangular array of numbers.
  77 //
  78 // NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
  79 // However, as Dims<N> is to be deprecated, this class exists as an adaptor
  80 // to enable simple unoptimized implementations of element-wise broadcasting
  81 // operations.
  82 template <int N> struct NdArrayDesc
  83 {
  84   // The "extent" of each dimension. Indices along dimension d must be in the
  85   // half-open interval [0, extents[d]).
  86   int extents[N];
  87
  88   // The number of *elements* (not bytes) between consecutive indices of each
  89   // dimension.
  90   int strides[N];
  91 };
  92
  93 // Comment from tensorflow lite:
  94 //
  95 // DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
  96 // BROADCASTING.
  97 //
  98 // Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
  99 inline int SubscriptToIndex(const NdArrayDesc<4> &desc, int i0, int i1, int i2, int i3)
 100 {
 101   assert(i0 >= 0 && i0 < desc.extents[0]);
 102   assert(i1 >= 0 && i1 < desc.extents[1]);
 103   assert(i2 >= 0 && i2 < desc.extents[2]);
 104   assert(i3 >= 0 && i3 < desc.extents[3]);
 105   return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + i3 * desc.strides[3];
 106 }
 107
 108 template <int N>
 109 inline void
 110 NdArrayDescsForElementwiseBroadcast(const Shape &input0_shape, const Shape &input1_shape,
 111                                     NdArrayDesc<N> *desc0_out, NdArrayDesc<N> *desc1_out)
 112 {
 113   assert(desc0_out != nullptr);
 114   assert(desc1_out != nullptr);
 115
 116   auto extended_input0_shape = Shape::ExtendedShape(N, input0_shape);
 117   auto extended_input1_shape = Shape::ExtendedShape(N, input1_shape);
 118
 119   // Copy dims to desc, calculating strides.
 120   int desc0_stride = 1;
 121   int desc1_stride = 1;
 122   for (int i = N - 1; i >= 0; --i)
 123   {
 124     desc0_out->extents[i] = extended_input0_shape.Dims(i);
 125     desc0_out->strides[i] = desc0_stride;
 126     desc0_stride *= extended_input0_shape.Dims(i);
 127     desc1_out->extents[i] = extended_input1_shape.Dims(i);
 128     desc1_out->strides[i] = desc1_stride;
 129     desc1_stride *= extended_input1_shape.Dims(i);
 130   }
 131
 132   // Walk over each dimension. If the extents are equal do nothing.
 133   // Otherwise, set the desc with extent 1 to have extent equal to the other and
 134   // stride 0.
 135   for (int i = 0; i < N; ++i)
 136   {
 137     const int extent0 = extended_input0_shape.Dims(i);
 138     const int extent1 = extended_input1_shape.Dims(i);
 139     if (extent0 != extent1)
 140     {
 141       if (extent0 == 1)
 142       {
 143         desc0_out->strides[i] = 0;
 144         desc0_out->extents[i] = extent1;
 145       }
 146       else
 147       {
 148         assert(extent1 == 1);
 149         desc1_out->strides[i] = 0;
 150         desc1_out->extents[i] = extent0;
 151       }
 152     }
 153   }
 154 }
 155
 156 // Gets next index to iterate through a multidimensional array.
 157 inline bool NextIndex(const int num_dims, const int *dims, int *current)
 158 {
 159   if (num_dims == 0)
 160   {
 161     return false;
 162   }
 163   assert(dims != nullptr);
 164   assert(current != nullptr);
 165   int carry = 1;
 166   for (int idx = num_dims - 1; idx >= 0; --idx)
 167   {
 168     int current_val = current[idx] + carry;
 169     assert(dims[idx] >= current_val);
 170     if (dims[idx] == current_val)
 171     {
 172       current[idx] = 0;
 173     }
 174     else
 175     {
 176       current[idx] = current_val;
 177       carry = 0;
 178       break;
 179     }
 180   }
 181   return (carry == 0);
 182 }
 183
 184 // Gets offset of index if reducing on axis. When reducing, the flattened offset
 185 // will not change, if the input index changes on the given axis. For example,
 186 // if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
 187 // then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
 188 // offset.
 189 // TODO(kanlig): uses Dims to represent dimensions.
 190 inline size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index,
 191                                   const int num_axis, const int *axis)
 192 {
 193   if (num_dims == 0)
 194   {
 195     return 0;
 196   }
 197
 198   assert(dims != nullptr);
 199   assert(index != nullptr);
 200
 201   size_t offset = 0;
 202   for (int idx = 0; idx < num_dims; ++idx)
 203   {
 204     // if we need to skip this axis
 205     bool is_axis = false;
 206     if (axis != nullptr)
 207     {
 208       for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
 209       {
 210         if (idx == axis[axis_idx])
 211         {
 212           is_axis = true;
 213           break;
 214         }
 215       }
 216     }
 217     if (!is_axis)
 218     {
 219       offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
 220     }
 221   }
 222   return offset;
 223 }
 224
 225 template <typename T> void optimized_ops_preload_l1_keep(const T *ptr)
 226 {
 227 #ifdef __GNUC__
 228   // builtin offered by GCC-compatible compilers including clang
 229   __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
 230 #else
 231   (void)ptr;
 232 #endif
 233 }
 234
 235 // Writes randomly accessed values from `input` sequentially into `output`.
 236 template <typename T> class SequentialTensorWriter
 237 {
 238 public:
 239   SequentialTensorWriter(const T *input_data, T *output_data)
 240       : input_data_(input_data), output_ptr_(output_data)
 241   {
 242   }
 243
 244   void Write(int position) { *output_ptr_++ = input_data_[position]; }
 245   void WriteN(int position, int len)
 246   {
 247     memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
 248     output_ptr_ += len;
 249   }
 250
 251 private:
 252   const T *input_data_;
 253   T *output_ptr_;
 254 };
 255
 256 } // namespace cker
 257 } // namespace nnfw
 258
 259 #endif // __NNFW_CKER_UTILS_H__