2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #ifndef __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
19 #define __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
21 #include "cker/Types.h"
22 #include "cker/neon/neon_check.h"
23 #include <ruy/context.h>
33 class ActivationFunctor
36 explicit ActivationFunctor(FusedActivationFunctionType act) : act_(act) {}
38 float operator()(float a) const
42 case FusedActivationFunctionType::kNone:
44 case FusedActivationFunctionType::kRelu:
45 return a < 0.f ? 0.f : a;
46 case FusedActivationFunctionType::kRelu6:
47 return std::max(0.f, std::min(a, 6.f));
48 case FusedActivationFunctionType::kTanh:
50 case FusedActivationFunctionType::kSigmoid:
51 return 1.0f / (1.0f + std::exp(-a));
53 // TODO(aselle): More informative fatal error!
59 FusedActivationFunctionType act_;
63 void PortableCwiseClipping(T *vector, const int v_size, const T clipping_value)
65 for (int i = 0; i < v_size; i++)
67 vector[i] = std::max(std::min(clipping_value, vector[i]), static_cast<T>(-clipping_value));
71 inline void PortableVectorBatchVectorAssign(const float *vector, int v_size, int n_batch,
74 for (int b = 0; b < n_batch; b++)
76 memcpy(batch_vector + b * v_size, vector, v_size * sizeof(float));
80 inline void PortableVectorBatchVectorAdd(const float *vector, int v_size, int n_batch,
83 for (int b = 0; b < n_batch; b++)
85 for (int i = 0; i < v_size; ++i)
87 batch_vector[i] += vector[i];
89 batch_vector += v_size;
93 inline bool PortableIsZeroVector(const float *vector, int v_size)
95 for (int i = 0; i < v_size; ++i)
97 if (*vector++ != 0.0f)
103 inline void PortableApplyActivationToVector(const float *vector, int v_size,
104 FusedActivationFunctionType activation, float *result)
106 auto activation_func = ActivationFunctor(activation);
107 for (int v = 0; v < v_size; v++)
109 *result++ = (activation_func)(*vector++);
113 inline void PortableSub1Vector(const float *vector, int v_size, float *result)
115 for (int v = 0; v < v_size; v++)
117 *result++ = 1.0f - *vector++;
121 inline void PortableSymmetricQuantizeFloats(const float *values, const int size,
122 int8_t *quantized_values, float *min_value,
123 float *max_value, float *scaling_factor)
125 auto minmax = std::minmax_element(values, values + size);
126 *min_value = *minmax.first;
127 *max_value = *minmax.second;
128 const int kScale = 127;
129 const float range = std::max(std::abs(*min_value), std::abs(*max_value));
132 memset(quantized_values, 0, size * sizeof(int8_t));
136 *scaling_factor = range / kScale;
137 const float scaling_factor_inv = kScale / range;
138 for (int i = 0; i < size; ++i)
140 const int32_t quantized_value =
141 static_cast<int32_t>(std::round(values[i] * scaling_factor_inv));
142 // Clamp: just in case some odd numeric offset.
143 quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
147 inline void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
148 const int m_rows, const int m_cols,
149 const int8_t *__restrict__ vectors,
150 const float *scaling_factors, int n_batch,
151 float *__restrict__ result,
155 for (batch = 0; batch < n_batch; ++batch, vectors += m_cols)
157 const float batch_scaling_factor = scaling_factors[batch];
158 // Get the address of the first row.
159 const int8_t *row_ptr = matrix;
160 for (row = 0; row < m_rows; ++row, result += result_stride)
162 // Initialize the dot product sum for the row to 0.
164 #if defined(__GNUC__)
165 // Prefetch the row to cache.
166 __builtin_prefetch(row_ptr, 0 /* prefetch for read */, 3 /* temporal locality */);
168 for (col = 0; col < m_cols; ++col, ++row_ptr)
170 dotprod += (*row_ptr) * (vectors[col]);
172 *result += (dotprod * batch_scaling_factor);
177 inline void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
178 const int m_rows, const int m_cols,
179 const int8_t *__restrict__ vector,
180 const float *scaling_factors, int n_batch,
181 int32_t *, float *__restrict__ result,
182 int result_stride, ruy::Context *)
184 PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, scaling_factors,
185 n_batch, result, result_stride);
188 inline void PortableMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
189 const float *vector, int n_batch,
190 float *result, int result_stride)
192 float *result_in_batch = result;
193 for (int b = 0; b < n_batch; b++)
195 const float *matrix_ptr = matrix;
196 for (int r = 0; r < m_rows; r++)
198 float dot_prod = 0.0f;
199 const float *vector_in_batch = vector + b * m_cols;
200 for (int c = 0; c < m_cols; c++)
202 dot_prod += *matrix_ptr++ * *vector_in_batch++;
204 *result_in_batch += dot_prod;
205 result_in_batch += result_stride;
210 inline void PortableMeanStddevNormalization(const float *input_vector, float *output_vector,
211 int v_size, int n_batch)
213 for (int batch = 0; batch < n_batch; ++batch)
216 for (int i = 0; i < v_size; ++i)
218 sum += input_vector[i];
220 const float mean = sum / v_size;
221 float sum_diff_sq = 0.0f;
222 for (int i = 0; i < v_size; ++i)
224 const float diff = input_vector[i] - mean;
225 sum_diff_sq += diff * diff;
227 const float variance = sum_diff_sq / v_size;
228 constexpr float kNormalizationConstant = 1e-8f;
229 const float stddev_inv = 1.0f / std::sqrt(variance + kNormalizationConstant);
230 for (int i = 0; i < v_size; ++i)
232 output_vector[i] = (input_vector[i] - mean) * stddev_inv;
234 input_vector += v_size;
235 output_vector += v_size;
239 inline void PortableZeroVector(float *vector, int v_size) { std::fill_n(vector, v_size, 0); }
244 #endif // __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__