2 * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #ifndef LUCI_INTERPRETER_PAL_TANH_H
19 #define LUCI_INTERPRETER_PAL_TANH_H
23 namespace luci_interpreter_pal
26 inline void Tanh(const int flat_size, const float *input_data, float *output_data)
28 for (int i = 0; i < flat_size; i++)
30 float val = input_data[i];
31 float result = std::tanh(val);
32 output_data[i] = result;
36 inline void Tanh(int32_t input_multiplier, int32_t input_left_shift, const int flat_size,
37 const int16_t *ptr_input_data, int16_t *ptr_output_data)
39 // We use the LUT for sigmoid and take into account, that
40 // tanh(x) = 2*sigmoid(2*x) - 1
42 // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
43 // In case of general parameter scale, multiplier 3 is taken into account
44 // in TanhPrepare function and it is included in
45 // input_multiplier already.
47 if (input_multiplier == 0)
48 { // power of two case
49 input_multiplier = 3 << input_left_shift;
53 int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
55 for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++)
57 int32_t input_data = ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
59 uint32_t abs_input_data = abs(input_data);
60 uint32_t uh = abs_input_data >> 8;
65 // Saturate to maximum.
70 uint32_t ua = sigmoid_table_uint16[uh];
71 uint32_t ub = sigmoid_table_uint16[uh + 1];
73 uint8_t ut = abs_input_data & 0xFF;
75 result = (ua << 8) + ut * (ub - ua);
78 result = (input_data >= 0) ? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
79 : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
81 // Convert back to 16-bit.
84 *ptr_output_data = result;
89 inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
90 int32_t input_multiplier, int32_t input_shift,
91 const int flat_size, const int8_t* input_data, int8_t* output_data) {
92 // Integer bits must be in sync with Prepare() function.
93 static constexpr int32_t kInputIntegerBits = 4;
94 static constexpr int32_t kOutputScale = 7;
95 static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
96 static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
98 for (int i = 0; i < flat_size; ++i) {
100 static_cast<int32_t>(input_data[i]) - input_zero_point;
101 if (input <= -input_range_radius) {
102 output_data[i] = kMinInt8;
103 } else if (input >= input_range_radius) {
104 output_data[i] = kMaxInt8;
106 const int32_t input_in_q4 =
107 multiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
108 const int32_t output_in_q0 = std::tanh(input_in_q4);
110 int32_t output_in_q24 =
111 roundingDivideByPOT(output_in_q0, 31 - kOutputScale);
112 output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
113 output_data[i] = static_cast<int8_t>(output_in_q24);
119 } // namespace luci_interpreter_pal
121 #endif // LUCI_INTERPRETER_PAL_TANH_H