src/core/CL/cl_kernels/fixed_point.h

   1 /*
   2  * Copyright (c) 2017-2018 ARM Limited.
   3  *
   4  * SPDX-License-Identifier: MIT
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to
   8  * deal in the Software without restriction, including without limitation the
   9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10  * sell copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #ifndef ARM_COMPUTE_FIXED_POINT_H
  25 #define ARM_COMPUTE_FIXED_POINT_H
  26
  27 #define TYPE_ALIAS(type, alias)  \
  28     typedef type alias;          \
  29     typedef type alias##x##1;    \
  30     typedef type##2 alias##x##2; \
  31     typedef type##3 alias##x##3; \
  32     typedef type##4 alias##x##4; \
  33     typedef type##8 alias##x##8; \
  34     typedef type##16 alias##x##16;
  35
  36 TYPE_ALIAS(char, qs8)
  37 TYPE_ALIAS(short, qs16)
  38 TYPE_ALIAS(int, qs32)
  39
  40 #define qs8_MIN ((char)CHAR_MIN)
  41 #define qs8_MAX ((char)CHAR_MAX)
  42 #define qs16_MIN ((short)SHRT_MIN)
  43 #define qs16_MAX ((short)SHRT_MAX)
  44 #define qs32_MIN ((int)INT_MIN)
  45 #define qs32_MAX ((int)INT_MAX)
  46
  47 #define qu8_MIN ((uchar)0)
  48 #define qu8_MAX ((uchar)UCHAR_MAX)
  49 #define qu16_MIN ((ushort)0)
  50 #define qu16_MAX ((ushort)USHRT_MAX)
  51 #define qu32_MIN ((uint)0)
  52 #define qu32_MAX ((uint)UINT_MAX)
  53
  54 #define qs8_TYPE char
  55 #define qs8x1_TYPE char
  56 #define qs8x2_TYPE char2
  57 #define qs8x3_TYPE char3
  58 #define qs8x4_TYPE char4
  59 #define qs8x8_TYPE char8
  60 #define qs8x16_TYPE char16
  61
  62 #define qs16_TYPE short
  63 #define qs16x1_TYPE short
  64 #define qs16x2_TYPE short2
  65 #define qs16x3_TYPE short3
  66 #define qs16x4_TYPE short4
  67 #define qs16x8_TYPE short8
  68 #define qs16x16_TYPE short16
  69
  70 #define qs32_TYPE int
  71 #define qs32x1_TYPE int
  72 #define qs32x2_TYPE int2
  73 #define qs32x3_TYPE int3
  74 #define qs32x4_TYPE int4
  75 #define qs32x8_TYPE int8
  76 #define qs32x16_TYPE int16
  77
  78 /* All internal constants are represented in the maximum supported fixed point format (QS16),
  79  * thus we define an additional shift parameter required to convert the constant
  80  * from the maximum supported format to the require one.
  81  */
  82 #define qs8_SHIFT 8
  83 #define qs16_SHIFT 0
  84
  85 #undef VEC_DATA_TYPE_STR
  86 #undef VEC_DATA_TYPE
  87 #undef CONVERT_STR
  88 #undef CONVERT
  89 #undef CONVERT_SAT_STR
  90 #undef CONVERT_SAT
  91
  92 #define VEC_DATA_TYPE_STR(type, size) type##x##size
  93 #define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
  94
  95 #define CONVERT_STR3(x, type, rtype) (convert_##rtype((x)))
  96 #define CONVERT_STR2(x, type, rtype) CONVERT_STR3(x, type, rtype)
  97 #define CONVERT_STR(x, type) CONVERT_STR2(x, type, type##_TYPE)
  98 #define CONVERT(x, type) CONVERT_STR(x, type)
  99
 100 #define CONVERT_SAT_STR3(x, type, rtype) (convert_##rtype##_sat((x)))
 101 #define CONVERT_SAT_STR2(x, type, rtype) CONVERT_SAT_STR3(x, type, rtype)
 102 #define CONVERT_SAT_STR(x, type) CONVERT_SAT_STR2(x, type, type##_TYPE)
 103 #define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
 104
 105 /** Computes saturating absolute value of fixed point vector.
 106  *
 107  * @param[in] type the actual data type.
 108  *
 109  * @return The result of the fixed point absolute value.
 110  */
 111 #define ABSQ_SAT_IMPL(type)                  \
 112     inline type abs_##type##_sat(type VopA)  \
 113     {                                        \
 114         return CONVERT_SAT(abs(VopA), type); \
 115     }
 116
 117 ABSQ_SAT_IMPL(qs8x16)
 118 ABSQ_SAT_IMPL(qs16x8)
 119
 120 #define ABS_SAT_OP_EXPAND_STR(a, type, size) abs_##type##x##size##_sat((a))
 121 #define ABS_SAT_OP_EXPAND(a, type, size) ABS_SAT_OP_EXPAND_STR(a, type, size)
 122
 123 /** Computes max of fixed point types.
 124  *
 125  * @param[in] type the actual data type.
 126  *
 127  * @return The result of the fixed point maximum.
 128  */
 129 #define MAXQ_IMPL(type)                          \
 130     inline type max_##type(type VopA, type VopB) \
 131     {                                            \
 132         return max(VopA, VopB);                  \
 133     }
 134
 135 MAXQ_IMPL(qs8x1)
 136 MAXQ_IMPL(qs8x2)
 137 MAXQ_IMPL(qs8x4)
 138 MAXQ_IMPL(qs8x8)
 139 MAXQ_IMPL(qs8x16)
 140 MAXQ_IMPL(qs16x1)
 141 MAXQ_IMPL(qs16x2)
 142 MAXQ_IMPL(qs16x4)
 143 MAXQ_IMPL(qs16x8)
 144 MAXQ_IMPL(qs16x16)
 145
 146 #define MAX_OP_EXPAND_STR(a, b, type, size) max_##type##x##size((a), (b))
 147 #define MAX_OP_EXPAND(a, b, type, size) MAX_OP_EXPAND_STR(a, b, type, size)
 148
 149 /** Computes saturated addition of fixed point types.
 150  *
 151  * @param[in] type the actual data type.
 152  *
 153  * @return The result of the fixed point addition. The result is saturated in case of overflow
 154  */
 155 #define ADDQ_SAT_IMPL(type)                          \
 156     inline type add_sat_##type(type VopA, type VopB) \
 157     {                                                \
 158         return add_sat(VopA, VopB);                  \
 159     }
 160
 161 ADDQ_SAT_IMPL(qs8x1)
 162 ADDQ_SAT_IMPL(qs8x2)
 163 ADDQ_SAT_IMPL(qs8x4)
 164 ADDQ_SAT_IMPL(qs8x8)
 165 ADDQ_SAT_IMPL(qs8x16)
 166 ADDQ_SAT_IMPL(qs16x1)
 167 ADDQ_SAT_IMPL(qs16x2)
 168 ADDQ_SAT_IMPL(qs16x4)
 169 ADDQ_SAT_IMPL(qs16x8)
 170 ADDQ_SAT_IMPL(qs16x16)
 171 ADDQ_SAT_IMPL(qs32x1)
 172 ADDQ_SAT_IMPL(qs32x2)
 173 ADDQ_SAT_IMPL(qs32x4)
 174 ADDQ_SAT_IMPL(qs32x8)
 175 ADDQ_SAT_IMPL(qs32x16)
 176
 177 #define ADD_SAT_OP_EXPAND_STR(a, b, type, size) add_sat_##type##x##size((a), (b))
 178 #define ADD_SAT_OP_EXPAND(a, b, type, size) ADD_SAT_OP_EXPAND_STR(a, b, type, size)
 179
 180 /** Computes saturated subtraction of fixed point types.
 181  *
 182  * @param[in] type the actual data type.
 183  *
 184  * @return The result of the fixed point subtraction. The result is saturated in case of overflow
 185  */
 186 #define SUBQ_SAT_IMPL(type)                          \
 187     inline type sub_sat_##type(type VopA, type VopB) \
 188     {                                                \
 189         return sub_sat(VopA, VopB);                  \
 190     }
 191
 192 SUBQ_SAT_IMPL(qs8x1)
 193 SUBQ_SAT_IMPL(qs8x2)
 194 SUBQ_SAT_IMPL(qs8x4)
 195 SUBQ_SAT_IMPL(qs8x8)
 196 SUBQ_SAT_IMPL(qs8x16)
 197 SUBQ_SAT_IMPL(qs16x1)
 198 SUBQ_SAT_IMPL(qs16x2)
 199 SUBQ_SAT_IMPL(qs16x4)
 200 SUBQ_SAT_IMPL(qs16x8)
 201 SUBQ_SAT_IMPL(qs16x16)
 202
 203 #define SUB_SAT_OP_EXPAND_STR(a, b, type, size) sub_sat_##type##x##size((a), (b))
 204 #define SUB_SAT_OP_EXPAND(a, b, type, size) SUB_SAT_OP_EXPAND_STR(a, b, type, size)
 205
 206 /* Multiply of two fixed point numbers
 207  *
 208  * @param[in] type  the actual data type.
 209  * @param[in] itype the intermediate data type.
 210  *
 211  * @return The result of the fixed point multiplication.
 212  */
 213 #define MULQ_IMPL(type, itype)                                                         \
 214     inline type mul_##type(type VopA, type VopB, int fixed_point_position)             \
 215     {                                                                                  \
 216         itype round_val = (itype)(1 << (fixed_point_position - 1));                    \
 217         itype res       = CONVERT((VopA), itype) * CONVERT((VopB), itype) + round_val; \
 218         return CONVERT((res >> (itype)fixed_point_position), type);                    \
 219     }
 220
 221 MULQ_IMPL(qs8x8, qs16x8)
 222 MULQ_IMPL(qs16x8, qs32x8)
 223 MULQ_IMPL(qs8x16, qs16x16)
 224 MULQ_IMPL(qs16x16, qs32x16)
 225
 226 #define MUL_OP_EXPAND_STR(a, b, type, size, position) mul_##type##x##size((a), (b), (position))
 227 #define MUL_OP_EXPAND(a, b, type, size, position) MUL_OP_EXPAND_STR(a, b, type, size, position)
 228
 229 /* Saturate multiply of two fixed point numbers
 230  *
 231  * @param[in] type  the actual data type.
 232  * @param[in] itype the intermediate data type.
 233  *
 234  * @return The result of the fixed point multiplication. The result is saturated in case of overflow
 235  */
 236 #define MULQ_SAT_IMPL(type, itype)                                                            \
 237     inline type mul_sat_##type(type VopA, type VopB, int fixed_point_position)                \
 238     {                                                                                         \
 239         itype round_val = (itype)(1 << (fixed_point_position - 1));                           \
 240         itype res       = mad_sat(CONVERT((VopA), itype), CONVERT((VopB), itype), round_val); \
 241         return CONVERT_SAT((res >> (itype)fixed_point_position), type);                       \
 242     }
 243
 244 MULQ_SAT_IMPL(qs8x1, qs16x1)
 245 MULQ_SAT_IMPL(qs8x2, qs16x2)
 246 MULQ_SAT_IMPL(qs8x3, qs16x3)
 247 MULQ_SAT_IMPL(qs8x4, qs16x4)
 248 MULQ_SAT_IMPL(qs8x8, qs16x8)
 249 MULQ_SAT_IMPL(qs8x16, qs16x16)
 250 MULQ_SAT_IMPL(qs16x1, qs32x1)
 251 MULQ_SAT_IMPL(qs16x2, qs32x2)
 252 MULQ_SAT_IMPL(qs16x3, qs32x3)
 253 MULQ_SAT_IMPL(qs16x4, qs32x4)
 254 MULQ_SAT_IMPL(qs16x8, qs32x8)
 255 MULQ_SAT_IMPL(qs16x16, qs32x16)
 256
 257 #define MUL_SAT_OP_EXPAND_STR(a, b, type, size, position) mul_sat_##type##x##size((a), (b), (position))
 258 #define MUL_SAT_OP_EXPAND(a, b, type, size, position) MUL_SAT_OP_EXPAND_STR(a, b, type, size, position)
 259
 260 /** Saturate multiply-accumulate
 261  *
 262  * @param[in] type  the actual data type.
 263  * @param[in] itype the intermediate data type.
 264  *
 265  * @return The result of the fixed point multiply-accumulate. The result is saturated in case of overflow
 266  */
 267 #define MLAQ_SAT_IMPL(type, itype)                                                                                 \
 268     type mla_sat_##type(type VopA, type VopB, type VopC, int fixed_point_position)                                 \
 269     {                                                                                                              \
 270         itype res = mad_sat(CONVERT(VopB, itype), CONVERT(VopC, itype), (itype)(1 << (fixed_point_position - 1))); \
 271         return add_sat(VopA, CONVERT_SAT(res >> (itype)fixed_point_position, type));                               \
 272     }
 273
 274 MLAQ_SAT_IMPL(qs8x8, qs16x8)
 275 MLAQ_SAT_IMPL(qs8x16, qs16x16)
 276 MLAQ_SAT_IMPL(qs16x8, qs32x8)
 277
 278 #define MLA_SAT_OP_EXPAND_STR(a, b, c, type, size, position) mla_sat_##type##x##size((a), (b), (c), (position))
 279 #define MLA_SAT_OP_EXPAND(a, b, c, type, size, position) MLA_SAT_OP_EXPAND_STR(a, b, c, type, size, position)
 280
 281 /** Saturate multiply-accumulate long
 282  *
 283  * @param[in] type  the actual data type.
 284  * @param[in] itype the intermediate data type.
 285  *
 286  * @return The result of the fixed point multiply-accumulate long. The result is saturated in case of overflow
 287  */
 288 #define MLALQ_SAT_IMPL(type, itype)                                                                                \
 289     itype mlal_sat_##type(itype VopA, type VopB, type VopC, int fixed_point_position)                              \
 290     {                                                                                                              \
 291         itype res = mad_sat(CONVERT(VopB, itype), CONVERT(VopC, itype), (itype)(1 << (fixed_point_position - 1))); \
 292         return add_sat(VopA, res >> (itype)fixed_point_position);                                                  \
 293     }
 294
 295 MLALQ_SAT_IMPL(qs8x8, qs16x8)
 296 MLALQ_SAT_IMPL(qs16x8, qs32x8)
 297
 298 #define MLAL_SAT_OP_EXPAND_STR(a, b, c, type, size, position) mlal_sat_##type##x##size((a), (b), (c), (position))
 299 #define MLAL_SAT_OP_EXPAND(a, b, c, type, size, position) MLAL_SAT_OP_EXPAND_STR(a, b, c, type, size, position)
 300
 301 /** Saturate division of two fixed point vectors
 302  *
 303  * @param[in] stype the actual scalar data type.
 304  * @param[in] type  the actual data type.
 305  * @param[in] itype the intermediate data type.
 306  *
 307  * @return The result of the fixed point division. The result is saturated in case of overflow
 308  */
 309 #define DIVQ_SAT_IMPL(stype, type, itype)                                                                                                                                           \
 310     inline type div_sat_##type(type VopA, type VopB, int fixed_point_position)                                                                                                      \
 311     {                                                                                                                                                                               \
 312         itype conv_a      = CONVERT((VopA), itype);                                                                                                                                 \
 313         itype denominator = CONVERT((VopB), itype);                                                                                                                                 \
 314         itype numerator   = conv_a << (itype)(fixed_point_position);                                                                                                                \
 315         itype res         = select((itype)(numerator / denominator), select((itype)stype##_MAX, (itype)stype##_MIN, (itype)(conv_a < (itype)0)), (itype)(denominator == (itype)0)); \
 316         return CONVERT_SAT((res), type);                                                                                                                                            \
 317     }
 318
 319 DIVQ_SAT_IMPL(qs8, qs8x16, qs16x16)
 320 DIVQ_SAT_IMPL(qs16, qs16x8, qs32x8)
 321 DIVQ_SAT_IMPL(qs16, qs16x16, qs32x16)
 322 DIVQ_SAT_IMPL(qs8, qs8, qs16)
 323 DIVQ_SAT_IMPL(qs16, qs16, qs32)
 324
 325 #define DIV_SAT_OP_EXPAND_STR(a, b, type, position) div_sat_##type((a), (b), (position))
 326 #define DIV_SAT_OP_EXPAND(a, b, type, position) DIV_SAT_OP_EXPAND_STR(a, b, type, position)
 327
 328 #define DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position) div_sat_##type##x##size((a), (b), (position))
 329 #define DIV_SAT_OP_VEC_EXPAND(a, b, type, size, position) DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position)
 330
 331 /** Saturate exponential of a fixed point vector
 332  *
 333  * @note Implemented approach uses taylor polynomial to approximate the exponential function.
 334  *
 335  * @param[in] stype the actual scalar data type.
 336  * @param[in] type  the actual data type.
 337  * @param[in] size  the number of the calculated elements.
 338  *
 339  * @return The result of the fixed point exponential. The result is saturated in case of overflow
 340  */
 341 #define EXPQ_IMPL(stype, type, size)                                                                                                              \
 342     inline type exp_sat_##type(type VopA, int fixed_point_position)                                                                               \
 343     {                                                                                                                                             \
 344         type const_one = (type)(1 << (fixed_point_position));                                                                                     \
 345         type ln2       = (type)((((0x58B9 >> (14 - fixed_point_position))) + 1) >> 1);                                                            \
 346         type inv_ln2   = (type)((((0x38AA >> (14 - fixed_point_position)) + 1) >> 1)) | const_one;                                                \
 347         type A         = (type)(((0x7FBA >> (14 - fixed_point_position)) + 1) >> 1);                                                              \
 348         type B         = (type)(((0x3FE9 >> (14 - fixed_point_position)) + 1) >> 1);                                                              \
 349         type C         = (type)(((0x1693 >> (14 - fixed_point_position)) + 1) >> 1);                                                              \
 350         type D         = (type)(((0x0592 >> (14 - fixed_point_position)) + 1) >> 1);                                                              \
 351         type m         = MUL_SAT_OP_EXPAND(VopA, inv_ln2, stype, size, fixed_point_position);                                                     \
 352         type dec_m     = m >> (type)fixed_point_position;                                                                                         \
 353         type alpha     = MUL_SAT_OP_EXPAND(dec_m << (type)fixed_point_position, ln2, stype, size, fixed_point_position);                          \
 354         alpha          = CONVERT(abs_diff(VopA, alpha), type);                                                                                    \
 355         type sum       = add_sat(MUL_SAT_OP_EXPAND(alpha, D, stype, size, fixed_point_position), C);                                              \
 356         sum            = add_sat(MUL_SAT_OP_EXPAND(alpha, sum, stype, size, fixed_point_position), B);                                            \
 357         sum            = add_sat(MUL_SAT_OP_EXPAND(alpha, sum, stype, size, fixed_point_position), A);                                            \
 358         sum            = add_sat(MUL_SAT_OP_EXPAND(alpha, sum, stype, size, fixed_point_position), const_one);                                    \
 359         return select((type)stype##_MAX, select(sum << dec_m, sum >> -dec_m, dec_m < (type)0), clz(sum) > dec_m); /* Saturate result if needed */ \
 360     }
 361
 362 EXPQ_IMPL(qs8, qs8x2, 2)
 363 EXPQ_IMPL(qs8, qs8x4, 4)
 364 EXPQ_IMPL(qs8, qs8x8, 8)
 365 EXPQ_IMPL(qs8, qs8x16, 16)
 366 EXPQ_IMPL(qs16, qs16x2, 2)
 367 EXPQ_IMPL(qs16, qs16x4, 4)
 368 EXPQ_IMPL(qs16, qs16x8, 8)
 369 EXPQ_IMPL(qs16, qs16x16, 16)
 370
 371 #define EXP_OP_EXPAND_STR(a, type, size, position) exp_sat_##type##x##size((a), (position))
 372 #define EXP_OP_EXPAND(a, type, size, position) EXP_OP_EXPAND_STR(a, type, size, position)
 373
 374 /** Saturate logarithm of a fixed point vector
 375  *
 376  * @note Implemented approach uses taylor polynomial to approximate the logarithm function.
 377  *
 378  * @param[in] stype the actual scalar data type.
 379  * @param[in] type  the actual data type.
 380  * @param[in] size  the number of the calculated elements.
 381  *
 382  * @return The result of the fixed point logarithm. The result is saturated in case of overflow
 383  */
 384 #define LOGQ_IMPL(stype, type, size)                                                                                                       \
 385     inline type log_sat_##type(type VopA, int fixed_point_position)                                                                        \
 386     {                                                                                                                                      \
 387         type const_one = (type)(1 << (fixed_point_position));                                                                              \
 388         type ln2       = (type)(0x58B9 >> (15 - fixed_point_position));  /* 1.4384189 */                                                   \
 389         type A         = (type)(0x5C0F >> (14 - fixed_point_position));  /* 1.4384189 */                                                   \
 390         type B         = -(type)(0x56AE >> (15 - fixed_point_position)); /* -0.6771900 */                                                  \
 391         type C         = (type)(0x2933 >> (15 - fixed_point_position));  /* 0.3218538 */                                                   \
 392         type D         = -(type)(0x0AA7 >> (15 - fixed_point_position)); /* -0.0832229 */                                                  \
 393         type inter_a   = select(VopA, DIV_SAT_OP_VEC_EXPAND(const_one, VopA, stype, size, fixed_point_position), VopA < const_one);        \
 394         type shift_val = (type)(15 - stype##_SHIFT) - clz(inter_a >> (type)fixed_point_position);                                          \
 395         inter_a        = inter_a >> shift_val;                                                                                             \
 396         inter_a        = sub_sat(inter_a, const_one);                                                                                      \
 397         type sum       = add_sat(MUL_SAT_OP_EXPAND(inter_a, D, stype, size, fixed_point_position), C);                                     \
 398         sum            = add_sat(MUL_SAT_OP_EXPAND(inter_a, sum, stype, size, fixed_point_position), B);                                   \
 399         sum            = add_sat(MUL_SAT_OP_EXPAND(inter_a, sum, stype, size, fixed_point_position), A);                                   \
 400         sum            = MUL_SAT_OP_EXPAND(inter_a, sum, stype, size, fixed_point_position);                                               \
 401         sum            = MUL_SAT_OP_EXPAND(add_sat(sum, shift_val << (type)fixed_point_position), ln2, stype, size, fixed_point_position); \
 402         return select(select(sum, -sum, VopA < const_one), (type)0, VopA < (type)0); /* Saturate result if needed */                       \
 403     }
 404
 405 LOGQ_IMPL(qs8, qs8x16, 16)
 406 LOGQ_IMPL(qs16, qs16x8, 8)
 407 LOGQ_IMPL(qs16, qs16x16, 16)
 408
 409 #define LOG_OP_EXPAND_STR(a, type, size, position) log_sat_##type##x##size((a), (position))
 410 #define LOG_OP_EXPAND(a, type, size, position) LOG_OP_EXPAND_STR(a, type, size, position)
 411
 412 /** Saturate inverse square root of a fixed point vector
 413  *
 414  * @note Implemented approach uses Newton's method to approximate the inverse square root function.
 415  *
 416  * @param[in] stype the actual scalar data type.
 417  * @param[in] type  the actual data type.
 418  * @param[in] size  the number of the calculated elements.
 419  *
 420  * @return The result of the fixed point inverse square root. The result is saturated in case of overflow
 421  */
 422 #define INVSQRTQ_IMPL(stype, type, size)                                                                                                                                                                                               \
 423     inline type invsqrt_sat_##type(type VopA, int fixed_point_position)                                                                                                                                                                \
 424     {                                                                                                                                                                                                                                  \
 425         type const_three = (type)(3 << (fixed_point_position));                                                                                                                                                                        \
 426         type shift_value = (type)(16 - stype##_SHIFT) - (clz(VopA) + (type)fixed_point_position);                                                                                                                                      \
 427         type temp        = select((type)(VopA >> shift_value), select((type)stype##_MAX, (type)(VopA << (-shift_value)), (type)(clz(VopA) > (-shift_value))), (type)(shift_value < (type)0));                                          \
 428         type x           = temp;                                                                                                                                                                                                       \
 429         x                = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \
 430         x                = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \
 431         x                = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1; \
 432         if(sizeof((stype)(1)) > 1) /* Perform more iterations if datatype is QS16 */                                                                                                                                                   \
 433         {                                                                                                                                                                                                                              \
 434             x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1;            \
 435             x = MUL_SAT_OP_EXPAND(x, sub_sat(const_three, MUL_SAT_OP_EXPAND(MUL_SAT_OP_EXPAND(x, x, stype, size, fixed_point_position), temp, stype, size, fixed_point_position)), stype, size, fixed_point_position) >> 1;            \
 436         }                                                                                                                                                                                                                              \
 437         type shift_value2 = select(shift_value >> 1, (-shift_value) >> 1, shift_value < (type)0);                                                                                                                                      \
 438         return select((type)(x >> shift_value2), select((type)stype##_MAX, (type)(x << shift_value2), (type)(clz(x) > shift_value2)), (type)(shift_value < (type)0)); /* Saturate result if needed */                                  \
 439     }
 440
 441 INVSQRTQ_IMPL(qs8, qs8x1, 1)
 442 INVSQRTQ_IMPL(qs16, qs16x1, 1)
 443 INVSQRTQ_IMPL(qs8, qs8x16, 16)
 444 INVSQRTQ_IMPL(qs16, qs16x8, 8)
 445
 446 #define INVSQRT_OP_EXPAND_STR(a, type, size, position) invsqrt_sat_##type##x##size((a), (position))
 447 #define INVSQRT_OP_EXPAND(a, type, size, position) INVSQRT_OP_EXPAND_STR(a, type, size, position)
 448
 449 /** Saturate hyperbolic tangent of a fixed point vector
 450  *
 451  * tanh(x) = (e^2x - 1)/(e^2x + 1)
 452  *
 453  * @param[in] stype the actual scalar data type.
 454  * @param[in] type  the actual data type.
 455  * @param[in] size  the number of the calculated elements.
 456  *
 457  * @return The result of the fixed point hyperbolic tangent. The result is saturated in case of overflow
 458  */
 459 #define TANHQ_IMPL(stype, type, size)                                                                                                             \
 460     inline type tanh_sat_##type(type VopA, int fixed_point_position)                                                                              \
 461     {                                                                                                                                             \
 462         type const_one = (type)(1 << (fixed_point_position));                                                                                     \
 463         type const_two = (type)(2 << (fixed_point_position));                                                                                     \
 464         type exp2x     = EXP_OP_EXPAND(MUL_SAT_OP_EXPAND(const_two, VopA, stype, size, fixed_point_position), stype, size, fixed_point_position); \
 465         type num       = SUB_SAT_OP_EXPAND(exp2x, const_one, stype, size);                                                                        \
 466         type den       = ADD_SAT_OP_EXPAND(exp2x, const_one, stype, size);                                                                        \
 467         return DIV_SAT_OP_VEC_EXPAND(num, den, stype, size, fixed_point_position);                                                                \
 468     }
 469
 470 TANHQ_IMPL(qs8, qs8x16, 16)
 471 TANHQ_IMPL(qs16, qs16x8, 8)
 472
 473 #define TANH_OP_EXPAND_STR(a, type, size, position) tanh_sat_##type##x##size((a), (position))
 474 #define TANH_OP_EXPAND(a, type, size, position) TANH_OP_EXPAND_STR(a, type, size, position)
 475
 476 #define floatx16 float16
 477 #define float16_TYPE float16
 478
 479 #define CONVERTQ_DOWN_IMPL(in_type, out_type)                                                                                        \
 480     inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position)                                              \
 481     {                                                                                                                                \
 482         return CONVERT(a * (1 << fixed_point_position) + select((in_type)-0.5f, (in_type)0.5f, isgreater(a, (in_type)0)), out_type); \
 483     }
 484
 485 CONVERTQ_DOWN_IMPL(float16, qs8x16)
 486 CONVERTQ_DOWN_IMPL(float16, qs16x16)
 487
 488 #define CONVERTQ_DOWN_SAT_IMPL(in_type, out_type)                                                                                        \
 489     inline out_type convert_##out_type##_##in_type##_sat(in_type a, int fixed_point_position)                                            \
 490     {                                                                                                                                    \
 491         return CONVERT_SAT(a * (1 << fixed_point_position) + select((in_type)-0.5f, (in_type)0.5f, isgreater(a, (in_type)0)), out_type); \
 492     }
 493
 494 CONVERTQ_DOWN_SAT_IMPL(float16, qs8x16)
 495 CONVERTQ_DOWN_SAT_IMPL(float16, qs16x16)
 496
 497 #define CONVERTQ_UP_IMPL(in_type, out_type)                                             \
 498     inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \
 499     {                                                                                   \
 500         return CONVERT(a, out_type) / (1 << fixed_point_position);                      \
 501     }
 502
 503 CONVERTQ_UP_IMPL(qs8x16, float16)
 504 CONVERTQ_UP_IMPL(qs16x16, float16)
 505
 506 #define SQCVT_SAT_IMPL(type)                                                                    \
 507     inline type sqcvt_##type##_sat(float a, int fixed_point_position)                           \
 508     {                                                                                           \
 509         return CONVERT_SAT((a * (1 << fixed_point_position) + ((a < 0) ? -0.5f : 0.5f)), type); \
 510     }
 511
 512 SQCVT_SAT_IMPL(qs8)
 513 SQCVT_SAT_IMPL(qs16)
 514
 515 #define SQCVT_SAT_OP_EXPAND_STR(a, type, position) sqcvt_##type##_sat((a), (position))
 516 #define SQCVT_SAT_OP_EXPAND(a, type, position) SQCVT_SAT_OP_EXPAND_STR((a), type, position)
 517
 518 #endif // ARM_COMPUTE_FIXED_POINT_H