compute/cker/include/cker/PortableTensorUtils.h

   1 /*
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *      http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
  19 #define __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__
  20
  21 #include "cker/Types.h"
  22 #include "cker/neon/neon_check.h"
  23 #include <ruy/context.h>
  24
  25 #include <cstring>
  26 #include <cmath>
  27
  28 namespace nnfw
  29 {
  30 namespace cker
  31 {
  32
  33 class ActivationFunctor
  34 {
  35 public:
  36   explicit ActivationFunctor(FusedActivationFunctionType act) : act_(act) {}
  37
  38   float operator()(float a) const
  39   {
  40     switch (act_)
  41     {
  42       case FusedActivationFunctionType::kNone:
  43         return a;
  44       case FusedActivationFunctionType::kRelu:
  45         return a < 0.f ? 0.f : a;
  46       case FusedActivationFunctionType::kRelu6:
  47         return std::max(0.f, std::min(a, 6.f));
  48       default:
  49         // TODO(aselle): More informative fatal error!
  50         exit(1);
  51     }
  52   }
  53
  54 private:
  55   FusedActivationFunctionType act_;
  56 };
  57
  58 void PortableVectorBatchVectorAssign(const float *vector, int v_size, int n_batch,
  59                                      float *batch_vector)
  60 {
  61   for (int b = 0; b < n_batch; b++)
  62   {
  63     memcpy(batch_vector + b * v_size, vector, v_size * sizeof(float));
  64   }
  65 }
  66
  67 bool PortableIsZeroVector(const float *vector, int v_size)
  68 {
  69   for (int i = 0; i < v_size; ++i)
  70   {
  71     if (*vector++ != 0.0f)
  72       return false;
  73   }
  74   return true;
  75 }
  76
  77 void PortableApplyActivationToVector(const float *vector, int v_size,
  78                                      FusedActivationFunctionType activation, float *result)
  79 {
  80   auto activation_func = ActivationFunctor(activation);
  81   for (int v = 0; v < v_size; v++)
  82   {
  83     *result++ = (activation_func)(*vector++);
  84   }
  85 }
  86
  87 void PortableSymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
  88                                      float *min_value, float *max_value, float *scaling_factor)
  89 {
  90   auto minmax = std::minmax_element(values, values + size);
  91   *min_value = *minmax.first;
  92   *max_value = *minmax.second;
  93   const int kScale = 127;
  94   const float range = std::max(std::abs(*min_value), std::abs(*max_value));
  95   if (range == 0)
  96   {
  97     memset(quantized_values, 0, size * sizeof(int8_t));
  98     *scaling_factor = 1;
  99     return;
 100   }
 101   *scaling_factor = range / kScale;
 102   const float scaling_factor_inv = kScale / range;
 103   for (int i = 0; i < size; ++i)
 104   {
 105     const int32_t quantized_value =
 106         static_cast<int32_t>(std::round(values[i] * scaling_factor_inv));
 107     // Clamp: just in case some odd numeric offset.
 108     quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
 109   }
 110 }
 111
 112 void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
 113                                                  const int m_rows, const int m_cols,
 114                                                  const int8_t *__restrict__ vectors,
 115                                                  const float *scaling_factors, int n_batch,
 116                                                  float *__restrict__ result, int result_stride)
 117 {
 118   int batch, row, col;
 119   for (batch = 0; batch < n_batch; ++batch, vectors += m_cols)
 120   {
 121     const float batch_scaling_factor = scaling_factors[batch];
 122     // Get the address of the first row.
 123     const int8_t *row_ptr = matrix;
 124     for (row = 0; row < m_rows; ++row, result += result_stride)
 125     {
 126       // Initialize the dot product sum for the row to 0.
 127       int32_t dotprod = 0;
 128 #if defined(__GNUC__)
 129       // Prefetch the row to cache.
 130       __builtin_prefetch(row_ptr, 0 /* prefetch for read */, 3 /* temporal locality */);
 131 #endif
 132       for (col = 0; col < m_cols; ++col, ++row_ptr)
 133       {
 134         dotprod += (*row_ptr) * (vectors[col]);
 135       } // for col
 136       *result += (dotprod * batch_scaling_factor);
 137     } // for row
 138   }   // for batch
 139 }
 140
 141 void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
 142                                                  const int m_rows, const int m_cols,
 143                                                  const int8_t *__restrict__ vector,
 144                                                  const float *scaling_factors, int n_batch,
 145                                                  int32_t *, float *__restrict__ result,
 146                                                  int result_stride, ruy::Context *)
 147 {
 148   PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, scaling_factors,
 149                                               n_batch, result, result_stride);
 150 }
 151
 152 void PortableMatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
 153                                                  const float *vector, int n_batch, float *result,
 154                                                  int result_stride)
 155 {
 156   float *result_in_batch = result;
 157   for (int b = 0; b < n_batch; b++)
 158   {
 159     const float *matrix_ptr = matrix;
 160     for (int r = 0; r < m_rows; r++)
 161     {
 162       float dot_prod = 0.0f;
 163       const float *vector_in_batch = vector + b * m_cols;
 164       for (int c = 0; c < m_cols; c++)
 165       {
 166         dot_prod += *matrix_ptr++ * *vector_in_batch++;
 167       }
 168       *result_in_batch += dot_prod;
 169       result_in_batch += result_stride;
 170     }
 171   }
 172 }
 173
 174 void PortableZeroVector(float *vector, int v_size) { std::fill_n(vector, v_size, 0); }
 175
 176 } // namespace cker
 177 } // namespace nnfw
 178
 179 #endif // __NNFW_CKER_PORTABLE_TENSOR_UTILS_H__