2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #ifndef __NNFW_CKER_UTILS_H__
19 #define __NNFW_CKER_UTILS_H__
25 #include <fixedpoint/fixedpoint.h>
33 inline T ActivationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
35 return std::min<T>(std::max<T>(x, output_activation_min), output_activation_max);
38 inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
40 int left_shift = shift > 0 ? shift : 0;
41 int right_shift = shift > 0 ? 0 : -shift;
42 return gemmlowp::RoundingDivideByPOT(
43 gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier),
47 inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(int32_t x, int32_t quantized_multiplier,
50 return gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier);
53 inline int NodeOffset(int b, int h, int w, int height, int width)
55 return (b * height + h) * width + w;
58 inline int CountLeadingZeros(uint32_t integer_input)
60 const uint32_t one_in_leading_positive = 1U << 31;
61 int leading_zeros = 0;
62 while (integer_input < one_in_leading_positive)
70 // Comment from tensorflow lite:
72 // DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
75 // NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
76 // rectangular array of numbers.
78 // NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
79 // However, as Dims<N> is to be deprecated, this class exists as an adaptor
80 // to enable simple unoptimized implementations of element-wise broadcasting
82 template <int N> struct NdArrayDesc
84 // The "extent" of each dimension. Indices along dimension d must be in the
85 // half-open interval [0, extents[d]).
88 // The number of *elements* (not bytes) between consecutive indices of each
93 // Comment from tensorflow lite:
95 // DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
98 // Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
99 inline int SubscriptToIndex(const NdArrayDesc<4> &desc, int i0, int i1, int i2, int i3)
101 assert(i0 >= 0 && i0 < desc.extents[0]);
102 assert(i1 >= 0 && i1 < desc.extents[1]);
103 assert(i2 >= 0 && i2 < desc.extents[2]);
104 assert(i3 >= 0 && i3 < desc.extents[3]);
105 return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + i3 * desc.strides[3];
110 NdArrayDescsForElementwiseBroadcast(const Shape &input0_shape, const Shape &input1_shape,
111 NdArrayDesc<N> *desc0_out, NdArrayDesc<N> *desc1_out)
113 assert(desc0_out != nullptr);
114 assert(desc1_out != nullptr);
116 auto extended_input0_shape = Shape::ExtendedShape(N, input0_shape);
117 auto extended_input1_shape = Shape::ExtendedShape(N, input1_shape);
119 // Copy dims to desc, calculating strides.
120 int desc0_stride = 1;
121 int desc1_stride = 1;
122 for (int i = N - 1; i >= 0; --i)
124 desc0_out->extents[i] = extended_input0_shape.Dims(i);
125 desc0_out->strides[i] = desc0_stride;
126 desc0_stride *= extended_input0_shape.Dims(i);
127 desc1_out->extents[i] = extended_input1_shape.Dims(i);
128 desc1_out->strides[i] = desc1_stride;
129 desc1_stride *= extended_input1_shape.Dims(i);
132 // Walk over each dimension. If the extents are equal do nothing.
133 // Otherwise, set the desc with extent 1 to have extent equal to the other and
135 for (int i = 0; i < N; ++i)
137 const int extent0 = extended_input0_shape.Dims(i);
138 const int extent1 = extended_input1_shape.Dims(i);
139 if (extent0 != extent1)
143 desc0_out->strides[i] = 0;
144 desc0_out->extents[i] = extent1;
148 assert(extent1 == 1);
149 desc1_out->strides[i] = 0;
150 desc1_out->extents[i] = extent0;
156 // Gets next index to iterate through a multidimensional array.
157 inline bool NextIndex(const int num_dims, const int *dims, int *current)
163 assert(dims != nullptr);
164 assert(current != nullptr);
166 for (int idx = num_dims - 1; idx >= 0; --idx)
168 int current_val = current[idx] + carry;
169 assert(dims[idx] >= current_val);
170 if (dims[idx] == current_val)
176 current[idx] = current_val;
184 // Gets offset of index if reducing on axis. When reducing, the flattened offset
185 // will not change, if the input index changes on the given axis. For example,
186 // if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
187 // then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
189 // TODO(kanlig): uses Dims to represent dimensions.
190 inline size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index,
191 const int num_axis, const int *axis)
198 assert(dims != nullptr);
199 assert(index != nullptr);
202 for (int idx = 0; idx < num_dims; ++idx)
204 // if we need to skip this axis
205 bool is_axis = false;
208 for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
210 if (idx == axis[axis_idx])
219 offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
225 template <typename T> void optimized_ops_preload_l1_keep(const T *ptr)
228 // builtin offered by GCC-compatible compilers including clang
229 __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
235 // Writes randomly accessed values from `input` sequentially into `output`.
236 template <typename T> class SequentialTensorWriter
239 SequentialTensorWriter(const T *input_data, T *output_data)
240 : input_data_(input_data), output_ptr_(output_data)
244 void Write(int position) { *output_ptr_++ = input_data_[position]; }
245 void WriteN(int position, int len)
247 memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
252 const T *input_data_;
259 #endif // __NNFW_CKER_UTILS_H__