compute/cker/include/cker/operation/SoftMax.h

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *      http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 #ifndef __NNFW_CKER_SOFTMAX_H__
  19 #define __NNFW_CKER_SOFTMAX_H__
  20
  21 #include "cker/Shape.h"
  22 #include "cker/Utils.h"
  23 #include "cker/Types.h"
  24 #include "cker/eigen/Utils.h"
  25
  26 #include <Eigen/Core>
  27 #include <fixedpoint/fixedpoint.h>
  28 #include <cmath>
  29
  30 namespace nnfw
  31 {
  32 namespace cker
  33 {
  34
  35 // Performs softmax along the input of size (input_size * batch_size).
  36 inline void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
  37                     float *out)
  38 {
  39   assert(input_size > 0);
  40
  41   // For each batch
  42   for (int b = 0; b < batch_size; b++)
  43   {
  44     // Find the max coeff.
  45     float max_coeff = in[0];
  46     for (int i = 1; i < input_size; i++)
  47     {
  48       if (in[i] > max_coeff)
  49         max_coeff = in[i];
  50     }
  51
  52     // Compute the normalized sum of exps.
  53     float exp_sum = 0.0;
  54     for (int i = 0; i < input_size; i++)
  55     {
  56       out[i] = std::exp((in[i] - max_coeff) * beta);
  57       exp_sum += out[i];
  58     }
  59
  60     // Divide by the sum of exps.
  61     float reciprocal_sum_exp = 1.f / exp_sum;
  62     for (int i = 0; i < input_size; i++)
  63     {
  64       out[i] *= reciprocal_sum_exp;
  65     }
  66
  67     // Advance in and out pointers for the next batch.
  68     in += input_size;
  69     out += input_size;
  70   }
  71 }
  72
  73 inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
  74                     const Shape &output_shape, float *output_data)
  75 {
  76   // Validate whether if shapes of input and output are the same
  77   MatchingFlatSize(input_shape, output_shape);
  78
  79   const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
  80   auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
  81   // Compute the exponential first, removing the max coefficient for numerical
  82   // stability.
  83   out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * params.beta;
  84   // We are separating out the exp function so that exp can be vectorized.
  85   out_mat = out_mat.array().exp();
  86   // Normalize to get the activations.
  87   Eigen::Array<float, 1, Eigen::Dynamic> scale = out_mat.array().colwise().sum().inverse();
  88   out_mat.array().rowwise() *= scale;
  89 }
  90
  91 inline void Softmax(const SoftmaxParams &params, const Shape &input_shape,
  92                     const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
  93 {
  94   const int32_t input_beta_multiplier = params.input_multiplier;
  95   const int32_t input_beta_left_shift = params.input_left_shift;
  96   const int diff_min = params.diff_min;
  97   // The representation chosen for the input to the exp() function is Q5.26.
  98   // We need to leave extra space since values that we skip might be as large as
  99   // -32 before multiplying by input_beta_multiplier, and therefore as large as
 100   // -16 afterwards.  Note that exp(-8) is definitely not insignificant to
 101   // accumulation, but exp(-16) definitely is.
 102   static const int kScaledDiffIntegerBits = 5;
 103   static const int kAccumulationIntegerBits = 12;
 104   using FixedPointScaledDiff = gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
 105   using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
 106   using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
 107
 108   const int trailing_dim = input_shape.DimensionsCount() - 1;
 109   const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
 110   const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
 111
 112   for (int i = 0; i < outer_size; ++i)
 113   {
 114     uint8_t max_in_row = 0;
 115     for (int c = 0; c < depth; ++c)
 116     {
 117       max_in_row = std::max(max_in_row, input_data[i * depth + c]);
 118     }
 119
 120     FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
 121     for (int c = 0; c < depth; ++c)
 122     {
 123       int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
 124       if (input_diff >= diff_min)
 125       {
 126         const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
 127             input_diff, input_beta_multiplier, input_beta_left_shift);
 128         const FixedPointScaledDiff scaled_diff_f8 =
 129             FixedPointScaledDiff::FromRaw(input_diff_rescaled);
 130         sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
 131                                         exp_on_negative_values(scaled_diff_f8));
 132       }
 133     }
 134
 135     int32_t fixed_sum_of_exps = sum_of_exps.raw();
 136     int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(fixed_sum_of_exps));
 137     // This is the number of bits to the left of the binary point above 1.0.
 138     // Consider fixed_sum_of_exps=1.25.  In that case shifted_scale=0.8 and
 139     // no later adjustment will be needed.
 140     int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one;
 141     int32_t shifted_sum_minus_one =
 142         static_cast<int32_t>((static_cast<uint32_t>(fixed_sum_of_exps) << headroom_plus_one) -
 143                              (static_cast<uint32_t>(1) << 31));
 144
 145     FixedPoint0 shifted_scale =
 146         one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one));
 147
 148     for (int c = 0; c < depth; ++c)
 149     {
 150       int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
 151       if (input_diff >= diff_min)
 152       {
 153         const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
 154             input_diff, input_beta_multiplier, input_beta_left_shift);
 155         const FixedPointScaledDiff scaled_diff_f8 =
 156             FixedPointScaledDiff::FromRaw(input_diff_rescaled);
 157
 158         FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
 159         int32_t unsat_output = gemmlowp::RoundingDivideByPOT((shifted_scale * exp_in_0).raw(),
 160                                                              num_bits_over_unit + 31 - 8);
 161
 162         output_data[i * depth + c] = static_cast<uint8_t>(
 163             std::max(std::min(unsat_output, static_cast<int32_t>(255)), static_cast<int32_t>(0)));
 164       }
 165       else
 166       {
 167         output_data[i * depth + c] = 0;
 168       }
 169     }
 170   }
 171 }
 172
 173 } // namespace cker
 174 } // namespace nnfw
 175
 176 #endif // __NNFW_CKER_SOFTMAX_H__