soft-fp/op-2.h

   1 /* Software floating-point emulation.
   2    Basic two-word fraction declaration and manipulation.
   3    Copyright (C) 1997-2014 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Lesser General Public
  12    License as published by the Free Software Foundation; either
  13    version 2.1 of the License, or (at your option) any later version.
  14
  15    In addition to the permissions in the GNU Lesser General Public
  16    License, the Free Software Foundation gives you unlimited
  17    permission to link the compiled version of this file into
  18    combinations with other programs, and to distribute those
  19    combinations without any restriction coming from the use of this
  20    file.  (The Lesser General Public License restrictions do apply in
  21    other respects; for example, they cover modification of the file,
  22    and distribution when not linked into a combine executable.)
  23
  24    The GNU C Library is distributed in the hope that it will be useful,
  25    but WITHOUT ANY WARRANTY; without even the implied warranty of
  26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27    Lesser General Public License for more details.
  28
  29    You should have received a copy of the GNU Lesser General Public
  30    License along with the GNU C Library; if not, see
  31    <http://www.gnu.org/licenses/>.  */
  32
  33 #define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
  34 #define _FP_FRAC_COPY_2(D, S)   (D##_f0 = S##_f0, D##_f1 = S##_f1)
  35 #define _FP_FRAC_SET_2(X, I)    __FP_FRAC_SET_2 (X, I)
  36 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
  37 #define _FP_FRAC_LOW_2(X)       (X##_f0)
  38 #define _FP_FRAC_WORD_2(X, w)   (X##_f##w)
  39
  40 #define _FP_FRAC_SLL_2(X, N)                                            \
  41   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  42           ? ({                                                          \
  43               if (__builtin_constant_p (N) && (N) == 1)                 \
  44                 {                                                       \
  45                   X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
  46                   X##_f0 += X##_f0;                                     \
  47                 }                                                       \
  48               else                                                      \
  49                 {                                                       \
  50                   X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
  51                   X##_f0 <<= (N);                                       \
  52                 }                                                       \
  53               0;                                                        \
  54             })                                                          \
  55           : ({                                                          \
  56               X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);               \
  57               X##_f0 = 0;                                               \
  58             }))
  59
  60
  61 #define _FP_FRAC_SRL_2(X, N)                                            \
  62   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  63           ? ({                                                          \
  64               X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
  65               X##_f1 >>= (N);                                           \
  66             })                                                          \
  67           : ({                                                          \
  68               X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);               \
  69               X##_f1 = 0;                                               \
  70             }))
  71
  72 /* Right shift with sticky-lsb.  */
  73 #define _FP_FRAC_SRST_2(X, S, N, sz)                                    \
  74   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  75           ? ({                                                          \
  76               S = (__builtin_constant_p (N) && (N) == 1                 \
  77                    ? X##_f0 & 1                                         \
  78                    : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);         \
  79               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
  80               X##_f1 >>= (N);                                           \
  81             })                                                          \
  82           : ({                                                          \
  83               S = ((((N) == _FP_W_TYPE_SIZE                             \
  84                      ? 0                                                \
  85                      : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))           \
  86                     | X##_f0) != 0);                                    \
  87               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));             \
  88               X##_f1 = 0;                                               \
  89             }))
  90
  91 #define _FP_FRAC_SRS_2(X, N, sz)                                        \
  92   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
  93           ? ({                                                          \
  94               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
  95                         | (__builtin_constant_p (N) && (N) == 1         \
  96                            ? X##_f0 & 1                                 \
  97                            : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
  98               X##_f1 >>= (N);                                           \
  99             })                                                          \
 100           : ({                                                          \
 101               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)               \
 102                         | ((((N) == _FP_W_TYPE_SIZE                     \
 103                              ? 0                                        \
 104                              : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))   \
 105                             | X##_f0) != 0));                           \
 106               X##_f1 = 0;                                               \
 107             }))
 108
 109 #define _FP_FRAC_ADDI_2(X, I)   \
 110   __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
 111
 112 #define _FP_FRAC_ADD_2(R, X, Y) \
 113   __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 114
 115 #define _FP_FRAC_SUB_2(R, X, Y) \
 116   __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
 117
 118 #define _FP_FRAC_DEC_2(X, Y)    \
 119   __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
 120
 121 #define _FP_FRAC_CLZ_2(R, X)                    \
 122   do                                            \
 123     {                                           \
 124       if (X##_f1)                               \
 125         __FP_CLZ (R, X##_f1);                   \
 126       else                                      \
 127         {                                       \
 128           __FP_CLZ (R, X##_f0);                 \
 129           R += _FP_W_TYPE_SIZE;                 \
 130         }                                       \
 131     }                                           \
 132   while (0)
 133
 134 /* Predicates */
 135 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE) X##_f1 < 0)
 136 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
 137 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
 138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X)   (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
 139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X)    \
 140   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
 141 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
 142 #define _FP_FRAC_GT_2(X, Y)     \
 143   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
 144 #define _FP_FRAC_GE_2(X, Y)     \
 145   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
 146
 147 #define _FP_ZEROFRAC_2          0, 0
 148 #define _FP_MINFRAC_2           0, 1
 149 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
 150
 151 /*
 152  * Internals
 153  */
 154
 155 #define __FP_FRAC_SET_2(X, I1, I0)      (X##_f0 = I0, X##_f1 = I1)
 156
 157 #define __FP_CLZ_2(R, xh, xl)                   \
 158   do                                            \
 159     {                                           \
 160       if (xh)                                   \
 161         __FP_CLZ (R, xh);                       \
 162       else                                      \
 163         {                                       \
 164           __FP_CLZ (R, xl);                     \
 165           R += _FP_W_TYPE_SIZE;                 \
 166         }                                       \
 167     }                                           \
 168   while (0)
 169
 170 #if 0
 171
 172 # ifndef __FP_FRAC_ADDI_2
 173 #  define __FP_FRAC_ADDI_2(xh, xl, i)   \
 174   (xh += ((xl += i) < i))
 175 # endif
 176 # ifndef __FP_FRAC_ADD_2
 177 #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)       \
 178   (rh = xh + yh + ((rl = xl + yl) < xl))
 179 # endif
 180 # ifndef __FP_FRAC_SUB_2
 181 #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)       \
 182   (rh = xh - yh - ((rl = xl - yl) > xl))
 183 # endif
 184 # ifndef __FP_FRAC_DEC_2
 185 #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)       \
 186   do                                            \
 187     {                                           \
 188       UWtype _t = xl;                           \
 189       xh -= yh + ((xl -= yl) > _t);             \
 190     }                                           \
 191   while (0)
 192 # endif
 193
 194 #else
 195
 196 # undef __FP_FRAC_ADDI_2
 197 # define __FP_FRAC_ADDI_2(xh, xl, i)    add_ssaaaa (xh, xl, xh, xl, 0, i)
 198 # undef __FP_FRAC_ADD_2
 199 # define __FP_FRAC_ADD_2                add_ssaaaa
 200 # undef __FP_FRAC_SUB_2
 201 # define __FP_FRAC_SUB_2                sub_ddmmss
 202 # undef __FP_FRAC_DEC_2
 203 # define __FP_FRAC_DEC_2(xh, xl, yh, yl)        \
 204   sub_ddmmss (xh, xl, xh, xl, yh, yl)
 205
 206 #endif
 207
 208 /*
 209  * Unpack the raw bits of a native fp value.  Do not classify or
 210  * normalize the data.
 211  */
 212
 213 #define _FP_UNPACK_RAW_2(fs, X, val)            \
 214   do                                            \
 215     {                                           \
 216       union _FP_UNION_##fs _flo;                \
 217       _flo.flt = (val);                         \
 218                                                 \
 219       X##_f0 = _flo.bits.frac0;                 \
 220       X##_f1 = _flo.bits.frac1;                 \
 221       X##_e  = _flo.bits.exp;                   \
 222       X##_s  = _flo.bits.sign;                  \
 223     }                                           \
 224   while (0)
 225
 226 #define _FP_UNPACK_RAW_2_P(fs, X, val)                                  \
 227   do                                                                    \
 228     {                                                                   \
 229       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);      \
 230                                                                         \
 231       X##_f0 = _flo->bits.frac0;                                        \
 232       X##_f1 = _flo->bits.frac1;                                        \
 233       X##_e  = _flo->bits.exp;                                          \
 234       X##_s  = _flo->bits.sign;                                         \
 235     }                                                                   \
 236   while (0)
 237
 238
 239 /*
 240  * Repack the raw bits of a native fp value.
 241  */
 242
 243 #define _FP_PACK_RAW_2(fs, val, X)              \
 244   do                                            \
 245     {                                           \
 246       union _FP_UNION_##fs _flo;                \
 247                                                 \
 248       _flo.bits.frac0 = X##_f0;                 \
 249       _flo.bits.frac1 = X##_f1;                 \
 250       _flo.bits.exp   = X##_e;                  \
 251       _flo.bits.sign  = X##_s;                  \
 252                                                 \
 253       (val) = _flo.flt;                         \
 254     }                                           \
 255   while (0)
 256
 257 #define _FP_PACK_RAW_2_P(fs, val, X)                                    \
 258   do                                                                    \
 259     {                                                                   \
 260       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);      \
 261                                                                         \
 262       _flo->bits.frac0 = X##_f0;                                        \
 263       _flo->bits.frac1 = X##_f1;                                        \
 264       _flo->bits.exp   = X##_e;                                         \
 265       _flo->bits.sign  = X##_s;                                         \
 266     }                                                                   \
 267   while (0)
 268
 269
 270 /*
 271  * Multiplication algorithms:
 272  */
 273
 274 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 275
 276 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)                \
 277   do                                                                    \
 278     {                                                                   \
 279       _FP_FRAC_DECL_2 (_b);                                             \
 280       _FP_FRAC_DECL_2 (_c);                                             \
 281                                                                         \
 282       doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \
 283       doit (_b_f1, _b_f0, X##_f0, Y##_f1);                              \
 284       doit (_c_f1, _c_f0, X##_f1, Y##_f0);                              \
 285       doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), X##_f1, Y##_f1); \
 286                                                                         \
 287       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 288                        _FP_FRAC_WORD_4 (R, 1), 0, _b_f1, _b_f0,         \
 289                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 290                        _FP_FRAC_WORD_4 (R, 1));                         \
 291       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 292                        _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0,         \
 293                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 294                        _FP_FRAC_WORD_4 (R, 1));                         \
 295     }                                                                   \
 296   while (0)
 297
 298 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
 299   do                                                                    \
 300     {                                                                   \
 301       _FP_FRAC_DECL_4 (_z);                                             \
 302                                                                         \
 303       _FP_MUL_MEAT_DW_2_wide (wfracbits, _z, X, Y, doit);               \
 304                                                                         \
 305       /* Normalize since we know where the msb of the multiplicands     \
 306          were (bit B), we know that the msb of the of the product is    \
 307          at either 2B or 2B-1.  */                                      \
 308       _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits);                    \
 309       R##_f0 = _FP_FRAC_WORD_4 (_z, 0);                                 \
 310       R##_f1 = _FP_FRAC_WORD_4 (_z, 1);                                 \
 311     }                                                                   \
 312   while (0)
 313
 314 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
 315    Do only 3 multiplications instead of four. This one is for machines
 316    where multiplication is much more expensive than subtraction.  */
 317
 318 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)           \
 319   do                                                                    \
 320     {                                                                   \
 321       _FP_FRAC_DECL_2 (_b);                                             \
 322       _FP_FRAC_DECL_2 (_c);                                             \
 323       _FP_W_TYPE _d;                                                    \
 324       int _c1, _c2;                                                     \
 325                                                                         \
 326       _b_f0 = X##_f0 + X##_f1;                                          \
 327       _c1 = _b_f0 < X##_f0;                                             \
 328       _b_f1 = Y##_f0 + Y##_f1;                                          \
 329       _c2 = _b_f1 < Y##_f0;                                             \
 330       doit (_d, _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0);                \
 331       doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), _b_f0, _b_f1); \
 332       doit (_c_f1, _c_f0, X##_f1, Y##_f1);                              \
 333                                                                         \
 334       _b_f0 &= -_c2;                                                    \
 335       _b_f1 &= -_c1;                                                    \
 336       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 337                        _FP_FRAC_WORD_4 (R, 1), (_c1 & _c2), 0, _d,      \
 338                        0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
 339       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
 340                         _b_f0);                                         \
 341       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
 342                         _b_f1);                                         \
 343       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 344                        _FP_FRAC_WORD_4 (R, 1),                          \
 345                        0, _d, _FP_FRAC_WORD_4 (R, 0));                  \
 346       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 347                        _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0);        \
 348       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
 349                        _c_f1, _c_f0,                                    \
 350                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
 351     }                                                                   \
 352   while (0)
 353
 354 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
 355   do                                                                    \
 356     {                                                                   \
 357       _FP_FRAC_DECL_4 (_z);                                             \
 358                                                                         \
 359       _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, _z, X, Y, doit);          \
 360                                                                         \
 361       /* Normalize since we know where the msb of the multiplicands     \
 362          were (bit B), we know that the msb of the of the product is    \
 363          at either 2B or 2B-1.  */                                      \
 364       _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits);                    \
 365       R##_f0 = _FP_FRAC_WORD_4 (_z, 0);                                 \
 366       R##_f1 = _FP_FRAC_WORD_4 (_z, 1);                                 \
 367     }                                                                   \
 368   while (0)
 369
 370 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)       \
 371   do                                                    \
 372     {                                                   \
 373       _FP_W_TYPE _x[2], _y[2];                          \
 374       _x[0] = X##_f0;                                   \
 375       _x[1] = X##_f1;                                   \
 376       _y[0] = Y##_f0;                                   \
 377       _y[1] = Y##_f1;                                   \
 378                                                         \
 379       mpn_mul_n (R##_f, _x, _y, 2);                     \
 380     }                                                   \
 381   while (0)
 382
 383 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
 384   do                                                                    \
 385     {                                                                   \
 386       _FP_FRAC_DECL_4 (_z);                                             \
 387                                                                         \
 388       _FP_MUL_MEAT_DW_2_gmp (wfracbits, _z, X, Y);                      \
 389                                                                         \
 390       /* Normalize since we know where the msb of the multiplicands     \
 391          were (bit B), we know that the msb of the of the product is    \
 392          at either 2B or 2B-1.  */                                      \
 393       _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits);                    \
 394       R##_f0 = _z_f[0];                                                 \
 395       R##_f1 = _z_f[1];                                                 \
 396     }                                                                   \
 397   while (0)
 398
 399 /* Do at most 120x120=240 bits multiplication using double floating
 400    point multiplication.  This is useful if floating point
 401    multiplication has much bigger throughput than integer multiply.
 402    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
 403    between 106 and 120 only.
 404    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
 405    SETFETZ is a macro which will disable all FPU exceptions and set rounding
 406    towards zero,  RESETFE should optionally reset it back.  */
 407
 408 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
 409   do                                                                    \
 410     {                                                                   \
 411       static const double _const[] =                                    \
 412         {                                                               \
 413           /* 2^-24 */ 5.9604644775390625e-08,                           \
 414           /* 2^-48 */ 3.5527136788005009e-15,                           \
 415           /* 2^-72 */ 2.1175823681357508e-22,                           \
 416           /* 2^-96 */ 1.2621774483536189e-29,                           \
 417           /* 2^28 */ 2.68435456e+08,                                    \
 418           /* 2^4 */ 1.600000e+01,                                       \
 419           /* 2^-20 */ 9.5367431640625e-07,                              \
 420           /* 2^-44 */ 5.6843418860808015e-14,                           \
 421           /* 2^-68 */ 3.3881317890172014e-21,                           \
 422           /* 2^-92 */ 2.0194839173657902e-28,                           \
 423           /* 2^-116 */ 1.2037062152420224e-35                           \
 424         };                                                              \
 425       double _a240, _b240, _c240, _d240, _e240, _f240,                  \
 426         _g240, _h240, _i240, _j240, _k240;                              \
 427       union { double d; UDItype i; } _l240, _m240, _n240, _o240,        \
 428                                        _p240, _q240, _r240, _s240;      \
 429       UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;             \
 430                                                                         \
 431       if (wfracbits < 106 || wfracbits > 120)                           \
 432         abort ();                                                       \
 433                                                                         \
 434       setfetz;                                                          \
 435                                                                         \
 436       _e240 = (double) (long) (X##_f0 & 0xffffff);                      \
 437       _j240 = (double) (long) (Y##_f0 & 0xffffff);                      \
 438       _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);              \
 439       _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);              \
 440       _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
 441       _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
 442       _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);               \
 443       _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);               \
 444       _a240 = (double) (long) (X##_f1 >> 32);                           \
 445       _f240 = (double) (long) (Y##_f1 >> 32);                           \
 446       _e240 *= _const[3];                                               \
 447       _j240 *= _const[3];                                               \
 448       _d240 *= _const[2];                                               \
 449       _i240 *= _const[2];                                               \
 450       _c240 *= _const[1];                                               \
 451       _h240 *= _const[1];                                               \
 452       _b240 *= _const[0];                                               \
 453       _g240 *= _const[0];                                               \
 454       _s240.d =                                                       _e240*_j240; \
 455       _r240.d =                                         _d240*_j240 + _e240*_i240; \
 456       _q240.d =                           _c240*_j240 + _d240*_i240 + _e240*_h240; \
 457       _p240.d =             _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
 458       _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
 459       _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;  \
 460       _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                \
 461       _l240.d = _a240*_g240 + _b240*_f240;                              \
 462       _k240 =   _a240*_f240;                                            \
 463       _r240.d += _s240.d;                                               \
 464       _q240.d += _r240.d;                                               \
 465       _p240.d += _q240.d;                                               \
 466       _o240.d += _p240.d;                                               \
 467       _n240.d += _o240.d;                                               \
 468       _m240.d += _n240.d;                                               \
 469       _l240.d += _m240.d;                                               \
 470       _k240 += _l240.d;                                                 \
 471       _s240.d -= ((_const[10]+_s240.d)-_const[10]);                     \
 472       _r240.d -= ((_const[9]+_r240.d)-_const[9]);                       \
 473       _q240.d -= ((_const[8]+_q240.d)-_const[8]);                       \
 474       _p240.d -= ((_const[7]+_p240.d)-_const[7]);                       \
 475       _o240.d += _const[7];                                             \
 476       _n240.d += _const[6];                                             \
 477       _m240.d += _const[5];                                             \
 478       _l240.d += _const[4];                                             \
 479       if (_s240.d != 0.0)                                               \
 480         _y240 = 1;                                                      \
 481       if (_r240.d != 0.0)                                               \
 482         _y240 = 1;                                                      \
 483       if (_q240.d != 0.0)                                               \
 484         _y240 = 1;                                                      \
 485       if (_p240.d != 0.0)                                               \
 486         _y240 = 1;                                                      \
 487       _t240 = (DItype) _k240;                                           \
 488       _u240 = _l240.i;                                                  \
 489       _v240 = _m240.i;                                                  \
 490       _w240 = _n240.i;                                                  \
 491       _x240 = _o240.i;                                                  \
 492       R##_f1 = ((_t240 << (128 - (wfracbits - 1)))                      \
 493                 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));     \
 494       R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))         \
 495                 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))       \
 496                 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))       \
 497                 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))        \
 498                 | _y240);                                               \
 499       resetfe;                                                          \
 500     }                                                                   \
 501   while (0)
 502
 503 /*
 504  * Division algorithms:
 505  */
 506
 507 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
 508   do                                                                    \
 509     {                                                                   \
 510       _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;       \
 511       if (_FP_FRAC_GE_2 (X, Y))                                         \
 512         {                                                               \
 513           _n_f2 = X##_f1 >> 1;                                          \
 514           _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;        \
 515           _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                      \
 516         }                                                               \
 517       else                                                              \
 518         {                                                               \
 519           R##_e--;                                                      \
 520           _n_f2 = X##_f1;                                               \
 521           _n_f1 = X##_f0;                                               \
 522           _n_f0 = 0;                                                    \
 523         }                                                               \
 524                                                                         \
 525       /* Normalize, i.e. make the most significant bit of the           \
 526          denominator set. */                                            \
 527       _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);                          \
 528                                                                         \
 529       udiv_qrnnd (R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);                 \
 530       umul_ppmm (_m_f1, _m_f0, R##_f1, Y##_f0);                         \
 531       _r_f0 = _n_f0;                                                    \
 532       if (_FP_FRAC_GT_2 (_m, _r))                                       \
 533         {                                                               \
 534           R##_f1--;                                                     \
 535           _FP_FRAC_ADD_2 (_r, Y, _r);                                   \
 536           if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r))          \
 537             {                                                           \
 538               R##_f1--;                                                 \
 539               _FP_FRAC_ADD_2 (_r, Y, _r);                               \
 540             }                                                           \
 541         }                                                               \
 542       _FP_FRAC_DEC_2 (_r, _m);                                          \
 543                                                                         \
 544       if (_r_f1 == Y##_f1)                                              \
 545         {                                                               \
 546           /* This is a special case, not an optimization                \
 547              (_r/Y##_f1 would not fit into UWtype).                     \
 548              As _r is guaranteed to be < Y,  R##_f0 can be either       \
 549              (UWtype)-1 or (UWtype)-2.  But as we know what kind        \
 550              of bits it is (sticky, guard, round),  we don't care.      \
 551              We also don't care what the reminder is,  because the      \
 552              guard bit will be set anyway.  -jj */                      \
 553           R##_f0 = -1;                                                  \
 554         }                                                               \
 555       else                                                              \
 556         {                                                               \
 557           udiv_qrnnd (R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);             \
 558           umul_ppmm (_m_f1, _m_f0, R##_f0, Y##_f0);                     \
 559           _r_f0 = 0;                                                    \
 560           if (_FP_FRAC_GT_2 (_m, _r))                                   \
 561             {                                                           \
 562               R##_f0--;                                                 \
 563               _FP_FRAC_ADD_2 (_r, Y, _r);                               \
 564               if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r))      \
 565                 {                                                       \
 566                   R##_f0--;                                             \
 567                   _FP_FRAC_ADD_2 (_r, Y, _r);                           \
 568                 }                                                       \
 569             }                                                           \
 570           if (!_FP_FRAC_EQ_2 (_r, _m))                                  \
 571             R##_f0 |= _FP_WORK_STICKY;                                  \
 572         }                                                               \
 573     }                                                                   \
 574   while (0)
 575
 576
 577 /*
 578  * Square root algorithms:
 579  * We have just one right now, maybe Newton approximation
 580  * should be added for those machines where division is fast.
 581  */
 582
 583 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                          \
 584   do                                                            \
 585     {                                                           \
 586       while (q)                                                 \
 587         {                                                       \
 588           T##_f1 = S##_f1 + q;                                  \
 589           if (T##_f1 <= X##_f1)                                 \
 590             {                                                   \
 591               S##_f1 = T##_f1 + q;                              \
 592               X##_f1 -= T##_f1;                                 \
 593               R##_f1 += q;                                      \
 594             }                                                   \
 595           _FP_FRAC_SLL_2 (X, 1);                                \
 596           q >>= 1;                                              \
 597         }                                                       \
 598       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);              \
 599       while (q != _FP_WORK_ROUND)                               \
 600         {                                                       \
 601           T##_f0 = S##_f0 + q;                                  \
 602           T##_f1 = S##_f1;                                      \
 603           if (T##_f1 < X##_f1                                   \
 604               || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))        \
 605             {                                                   \
 606               S##_f0 = T##_f0 + q;                              \
 607               S##_f1 += (T##_f0 > S##_f0);                      \
 608               _FP_FRAC_DEC_2 (X, T);                            \
 609               R##_f0 += q;                                      \
 610             }                                                   \
 611           _FP_FRAC_SLL_2 (X, 1);                                \
 612           q >>= 1;                                              \
 613         }                                                       \
 614       if (X##_f0 | X##_f1)                                      \
 615         {                                                       \
 616           if (S##_f1 < X##_f1                                   \
 617               || (S##_f1 == X##_f1 && S##_f0 < X##_f0))         \
 618             R##_f0 |= _FP_WORK_ROUND;                           \
 619           R##_f0 |= _FP_WORK_STICKY;                            \
 620         }                                                       \
 621     }                                                           \
 622   while (0)
 623
 624
 625 /*
 626  * Assembly/disassembly for converting to/from integral types.
 627  * No shifting or overflow handled here.
 628  */
 629
 630 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
 631   (void) ((rsize <= _FP_W_TYPE_SIZE)            \
 632           ? ({ r = X##_f0; })                   \
 633           : ({                                  \
 634               r = X##_f1;                       \
 635               r <<= _FP_W_TYPE_SIZE;            \
 636               r += X##_f0;                      \
 637             }))
 638
 639 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
 640   do                                                                    \
 641     {                                                                   \
 642       X##_f0 = r;                                                       \
 643       X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
 644     }                                                                   \
 645   while (0)
 646
 647 /*
 648  * Convert FP values between word sizes
 649  */
 650
 651 #define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
 652
 653 #define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))
 654
 655 #define _FP_FRAC_COPY_2_2(D, S)         _FP_FRAC_COPY_2 (D, S)