2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #ifndef __NNFW_CKER_SOFTMAX_H__
19 #define __NNFW_CKER_SOFTMAX_H__
21 #include "cker/Shape.h"
22 #include "cker/Utils.h"
23 #include "cker/Types.h"
24 #include "cker/eigen/Utils.h"
27 #include <fixedpoint/fixedpoint.h>
38 // Note. This Softmax function supports all of dimensions
39 inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape, const float *input_data,
40 const Shape &output_shape, float *output_data)
42 const int trailing_dim = input_shape.DimensionsCount() - 1;
43 const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
44 const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
46 for (int i = 0; i < outer_size; ++i)
48 // Find max element value which we'll use to ensure numerical stability
49 // taking advantage of the following equality:
50 // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
51 float max = std::numeric_limits<float>::lowest();
52 for (int c = 0; c < depth; ++c)
54 max = std::max(max, input_data[i * depth + c]);
59 for (int c = 0; c < depth; ++c)
61 sum += std::exp((input_data[i * depth + c] - max) * static_cast<float>(params.beta));
65 for (int c = 0; c < depth; ++c)
67 output_data[i * depth + c] =
68 std::exp((input_data[i * depth + c] - max) * static_cast<float>(params.beta)) / sum;
74 // Performs softmax along the input of size (input_size * batch_size).
75 inline void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
78 assert(input_size > 0);
81 for (int b = 0; b < batch_size; b++)
83 // Find the max coeff.
84 float max_coeff = in[0];
85 for (int i = 1; i < input_size; i++)
87 if (in[i] > max_coeff)
91 // Compute the normalized sum of exps.
93 for (int i = 0; i < input_size; i++)
95 out[i] = std::exp((in[i] - max_coeff) * beta);
99 // Divide by the sum of exps.
100 float reciprocal_sum_exp = 1.f / exp_sum;
101 for (int i = 0; i < input_size; i++)
103 out[i] *= reciprocal_sum_exp;
106 // Advance in and out pointers for the next batch.
112 inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape, const float *input_data,
113 const Shape &output_shape, float *output_data)
115 // Validate whether if shapes of input and output are the same
116 MatchingFlatSize(input_shape, output_shape);
118 const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
119 auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
120 // Compute the exponential first, removing the max coefficient for numerical
122 out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * params.beta;
123 // We are separating out the exp function so that exp can be vectorized.
124 out_mat = out_mat.array().exp();
125 // Normalize to get the activations.
126 Eigen::Array<float, 1, Eigen::Dynamic> scale = out_mat.array().colwise().sum().inverse();
127 out_mat.array().rowwise() *= scale;
130 inline void Softmax(const SoftmaxParams ¶ms, const Shape &input_shape,
131 const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
133 const int32_t input_beta_multiplier = params.input_multiplier;
134 const int32_t input_beta_left_shift = params.input_left_shift;
135 const int diff_min = params.diff_min;
136 // The representation chosen for the input to the exp() function is Q5.26.
137 // We need to leave extra space since values that we skip might be as large as
138 // -32 before multiplying by input_beta_multiplier, and therefore as large as
139 // -16 afterwards. Note that exp(-8) is definitely not insignificant to
140 // accumulation, but exp(-16) definitely is.
141 static const int kScaledDiffIntegerBits = 5;
142 static const int kAccumulationIntegerBits = 12;
143 using FixedPointScaledDiff = gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
144 using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
145 using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
147 const int trailing_dim = input_shape.DimensionsCount() - 1;
148 const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
149 const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
151 for (int i = 0; i < outer_size; ++i)
153 uint8_t max_in_row = 0;
154 for (int c = 0; c < depth; ++c)
156 max_in_row = std::max(max_in_row, input_data[i * depth + c]);
159 FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
160 for (int c = 0; c < depth; ++c)
162 int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
163 if (input_diff >= diff_min)
165 const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
166 input_diff, input_beta_multiplier, input_beta_left_shift);
167 const FixedPointScaledDiff scaled_diff_f8 =
168 FixedPointScaledDiff::FromRaw(input_diff_rescaled);
169 sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
170 exp_on_negative_values(scaled_diff_f8));
174 int32_t fixed_sum_of_exps = sum_of_exps.raw();
175 int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(fixed_sum_of_exps));
176 // This is the number of bits to the left of the binary point above 1.0.
177 // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and
178 // no later adjustment will be needed.
179 int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one;
180 int32_t shifted_sum_minus_one =
181 static_cast<int32_t>((static_cast<uint32_t>(fixed_sum_of_exps) << headroom_plus_one) -
182 (static_cast<uint32_t>(1) << 31));
184 FixedPoint0 shifted_scale =
185 one_over_one_plus_x_for_x_in_0_1(FixedPoint0::FromRaw(shifted_sum_minus_one));
187 for (int c = 0; c < depth; ++c)
189 int32_t input_diff = static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
190 if (input_diff >= diff_min)
192 const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne(
193 input_diff, input_beta_multiplier, input_beta_left_shift);
194 const FixedPointScaledDiff scaled_diff_f8 =
195 FixedPointScaledDiff::FromRaw(input_diff_rescaled);
197 FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
198 int32_t unsat_output = gemmlowp::RoundingDivideByPOT((shifted_scale * exp_in_0).raw(),
199 num_bits_over_unit + 31 - 8);
201 output_data[i * depth + c] = static_cast<uint8_t>(
202 std::max(std::min(unsat_output, static_cast<int32_t>(255)), static_cast<int32_t>(0)));
206 output_data[i * depth + c] = 0;
215 #endif // __NNFW_CKER_SOFTMAX_H__