soft-fp/op-2.h

   1 /* Software floating-point emulation.
   2    Basic two-word fraction declaration and manipulation.
   3    Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Library General Public License as
  12    published by the Free Software Foundation; either version 2 of the
  13    License, or (at your option) any later version.
  14
  15    The GNU C Library is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18    Library General Public License for more details.
  19
  20    You should have received a copy of the GNU Library General Public
  21    License along with the GNU C Library; see the file COPYING.LIB.  If
  22    not, write to the Free Software Foundation, Inc.,
  23    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  24
  25 #define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
  26 #define _FP_FRAC_COPY_2(D,S)    (D##_f0 = S##_f0, D##_f1 = S##_f1)
  27 #define _FP_FRAC_SET_2(X,I)     __FP_FRAC_SET_2(X, I)
  28 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
  29 #define _FP_FRAC_LOW_2(X)       (X##_f0)
  30 #define _FP_FRAC_WORD_2(X,w)    (X##_f##w)
  31
  32 #define _FP_FRAC_SLL_2(X,N)                                             \
  33   do {                                                                  \
  34     if ((N) < _FP_W_TYPE_SIZE)                                          \
  35       {                                                                 \
  36         if (__builtin_constant_p(N) && (N) == 1)                        \
  37           {                                                             \
  38             X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
  39             X##_f0 += X##_f0;                                           \
  40           }                                                             \
  41         else                                                            \
  42           {                                                             \
  43             X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
  44             X##_f0 <<= (N);                                             \
  45           }                                                             \
  46       }                                                                 \
  47     else                                                                \
  48       {                                                                 \
  49         X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                     \
  50         X##_f0 = 0;                                                     \
  51       }                                                                 \
  52   } while (0)
  53
  54 #define _FP_FRAC_SRL_2(X,N)                                             \
  55   do {                                                                  \
  56     if ((N) < _FP_W_TYPE_SIZE)                                          \
  57       {                                                                 \
  58         X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));     \
  59         X##_f1 >>= (N);                                                 \
  60       }                                                                 \
  61     else                                                                \
  62       {                                                                 \
  63         X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                     \
  64         X##_f1 = 0;                                                     \
  65       }                                                                 \
  66   } while (0)
  67
  68 /* Right shift with sticky-lsb.  */
  69 #define _FP_FRAC_SRS_2(X,N,sz)                                          \
  70   do {                                                                  \
  71     if ((N) < _FP_W_TYPE_SIZE)                                          \
  72       {                                                                 \
  73         X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) |   \
  74                   (__builtin_constant_p(N) && (N) == 1                  \
  75                    ? X##_f0 & 1                                         \
  76                    : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));        \
  77         X##_f1 >>= (N);                                                 \
  78       }                                                                 \
  79     else                                                                \
  80       {                                                                 \
  81         X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |                   \
  82                   (((X##_f1 << (sz - (N))) | X##_f0) != 0));            \
  83         X##_f1 = 0;                                                     \
  84       }                                                                 \
  85   } while (0)
  86
  87 #define _FP_FRAC_ADDI_2(X,I)    \
  88   __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
  89
  90 #define _FP_FRAC_ADD_2(R,X,Y)   \
  91   __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
  92
  93 #define _FP_FRAC_SUB_2(R,X,Y)   \
  94   __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
  95
  96 #define _FP_FRAC_DEC_2(X,Y)     \
  97   __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
  98
  99 #define _FP_FRAC_CLZ_2(R,X)     \
 100   do {                          \
 101     if (X##_f1)                 \
 102       __FP_CLZ(R,X##_f1);       \
 103     else                        \
 104     {                           \
 105       __FP_CLZ(R,X##_f0);       \
 106       R += _FP_W_TYPE_SIZE;     \
 107     }                           \
 108   } while(0)
 109
 110 /* Predicates */
 111 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE)X##_f1 < 0)
 112 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
 113 #define _FP_FRAC_OVERP_2(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
 114 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
 115 #define _FP_FRAC_GT_2(X, Y)     \
 116   (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 > Y##_f0)
 117 #define _FP_FRAC_GE_2(X, Y)     \
 118   (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)
 119
 120 #define _FP_ZEROFRAC_2          0, 0
 121 #define _FP_MINFRAC_2           0, 1
 122 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
 123
 124 /*
 125  * Internals
 126  */
 127
 128 #define __FP_FRAC_SET_2(X,I1,I0)        (X##_f0 = I0, X##_f1 = I1)
 129
 130 #define __FP_CLZ_2(R, xh, xl)   \
 131   do {                          \
 132     if (xh)                     \
 133       __FP_CLZ(R,xh);           \
 134     else                        \
 135     {                           \
 136       __FP_CLZ(R,xl);           \
 137       R += _FP_W_TYPE_SIZE;     \
 138     }                           \
 139   } while(0)
 140
 141 #if 0
 142
 143 #ifndef __FP_FRAC_ADDI_2
 144 #define __FP_FRAC_ADDI_2(xh, xl, i)     \
 145   (xh += ((xl += i) < i))
 146 #endif
 147 #ifndef __FP_FRAC_ADD_2
 148 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
 149   (rh = xh + yh + ((rl = xl + yl) < xl))
 150 #endif
 151 #ifndef __FP_FRAC_SUB_2
 152 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
 153   (rh = xh - yh - ((rl = xl - yl) > xl))
 154 #endif
 155 #ifndef __FP_FRAC_DEC_2
 156 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
 157   do {                                  \
 158     UWtype _t = xl;                     \
 159     xh -= yh + ((xl -= yl) > _t);       \
 160   } while (0)
 161 #endif
 162
 163 #else
 164
 165 #undef __FP_FRAC_ADDI_2
 166 #define __FP_FRAC_ADDI_2(xh, xl, i)     add_ssaaaa(xh, xl, xh, xl, 0, i)
 167 #undef __FP_FRAC_ADD_2
 168 #define __FP_FRAC_ADD_2                 add_ssaaaa
 169 #undef __FP_FRAC_SUB_2
 170 #define __FP_FRAC_SUB_2                 sub_ddmmss
 171 #undef __FP_FRAC_DEC_2
 172 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
 173
 174 #endif
 175
 176 /*
 177  * Unpack the raw bits of a native fp value.  Do not classify or
 178  * normalize the data.
 179  */
 180
 181 #define _FP_UNPACK_RAW_2(fs, X, val)                    \
 182   do {                                                  \
 183     union _FP_UNION_##fs _flo; _flo.flt = (val);        \
 184                                                         \
 185     X##_f0 = _flo.bits.frac0;                           \
 186     X##_f1 = _flo.bits.frac1;                           \
 187     X##_e  = _flo.bits.exp;                             \
 188     X##_s  = _flo.bits.sign;                            \
 189   } while (0)
 190
 191 #define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
 192   do {                                                  \
 193     union _FP_UNION_##fs *_flo =                        \
 194       (union _FP_UNION_##fs *)(val);                    \
 195                                                         \
 196     X##_f0 = _flo->bits.frac0;                          \
 197     X##_f1 = _flo->bits.frac1;                          \
 198     X##_e  = _flo->bits.exp;                            \
 199     X##_s  = _flo->bits.sign;                           \
 200   } while (0)
 201
 202
 203 /*
 204  * Repack the raw bits of a native fp value.
 205  */
 206
 207 #define _FP_PACK_RAW_2(fs, val, X)                      \
 208   do {                                                  \
 209     union _FP_UNION_##fs _flo;                          \
 210                                                         \
 211     _flo.bits.frac0 = X##_f0;                           \
 212     _flo.bits.frac1 = X##_f1;                           \
 213     _flo.bits.exp   = X##_e;                            \
 214     _flo.bits.sign  = X##_s;                            \
 215                                                         \
 216     (val) = _flo.flt;                                   \
 217   } while (0)
 218
 219 #define _FP_PACK_RAW_2_P(fs, val, X)                    \
 220   do {                                                  \
 221     union _FP_UNION_##fs *_flo =                        \
 222       (union _FP_UNION_##fs *)(val);                    \
 223                                                         \
 224     _flo->bits.frac0 = X##_f0;                          \
 225     _flo->bits.frac1 = X##_f1;                          \
 226     _flo->bits.exp   = X##_e;                           \
 227     _flo->bits.sign  = X##_s;                           \
 228   } while (0)
 229
 230
 231 /*
 232  * Multiplication algorithms:
 233  */
 234
 235 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 236
 237 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
 238   do {                                                                  \
 239     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
 240                                                                         \
 241     doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
 242     doit(_b_f1, _b_f0, X##_f0, Y##_f1);                                 \
 243     doit(_c_f1, _c_f0, X##_f1, Y##_f0);                                 \
 244     doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
 245                                                                         \
 246     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 247                     _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,             \
 248                     _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 249                     _FP_FRAC_WORD_4(_z,1));                             \
 250     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 251                     _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,             \
 252                     _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 253                     _FP_FRAC_WORD_4(_z,1));                             \
 254                                                                         \
 255     /* Normalize since we know where the msb of the multiplicands       \
 256        were (bit B), we know that the msb of the of the product is      \
 257        at either 2B or 2B-1.  */                                        \
 258     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
 259     R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
 260     R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
 261   } while (0)
 262
 263 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
 264    Do only 3 multiplications instead of four. This one is for machines
 265    where multiplication is much more expensive than subtraction.  */
 266
 267 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
 268   do {                                                                  \
 269     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
 270     _FP_W_TYPE _d;                                                      \
 271     int _c1, _c2;                                                       \
 272                                                                         \
 273     _b_f0 = X##_f0 + X##_f1;                                            \
 274     _c1 = _b_f0 < X##_f0;                                               \
 275     _b_f1 = Y##_f0 + Y##_f1;                                            \
 276     _c2 = _b_f1 < Y##_f0;                                               \
 277     doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);                    \
 278     doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);   \
 279     doit(_c_f1, _c_f0, X##_f1, Y##_f1);                                 \
 280                                                                         \
 281     _b_f0 &= -_c2;                                                      \
 282     _b_f1 &= -_c1;                                                      \
 283     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 284                     _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,          \
 285                     0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));   \
 286     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
 287                      _b_f0);                                            \
 288     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),       \
 289                      _b_f1);                                            \
 290     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 291                     _FP_FRAC_WORD_4(_z,1),                              \
 292                     0, _d, _FP_FRAC_WORD_4(_z,0));                      \
 293     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
 294                     _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);            \
 295     __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),       \
 296                     _c_f1, _c_f0,                                       \
 297                     _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));      \
 298                                                                         \
 299     /* Normalize since we know where the msb of the multiplicands       \
 300        were (bit B), we know that the msb of the of the product is      \
 301        at either 2B or 2B-1.  */                                        \
 302     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
 303     R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
 304     R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
 305   } while (0)
 306
 307 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
 308   do {                                                                  \
 309     _FP_FRAC_DECL_4(_z);                                                \
 310     _FP_W_TYPE _x[2], _y[2];                                            \
 311     _x[0] = X##_f0; _x[1] = X##_f1;                                     \
 312     _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
 313                                                                         \
 314     mpn_mul_n(_z_f, _x, _y, 2);                                         \
 315                                                                         \
 316     /* Normalize since we know where the msb of the multiplicands       \
 317        were (bit B), we know that the msb of the of the product is      \
 318        at either 2B or 2B-1.  */                                        \
 319     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
 320     R##_f0 = _z_f[0];                                                   \
 321     R##_f1 = _z_f[1];                                                   \
 322   } while (0)
 323
 324 /* Do at most 120x120=240 bits multiplication using double floating
 325    point multiplication.  This is useful if floating point
 326    multiplication has much bigger throughput than integer multiply.
 327    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
 328    between 106 and 120 only.
 329    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
 330    SETFETZ is a macro which will disable all FPU exceptions and set rounding
 331    towards zero,  RESETFE should optionally reset it back.  */
 332
 333 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)     \
 334   do {                                                                          \
 335     static const double _const[] = {                                            \
 336       /* 2^-24 */ 5.9604644775390625e-08,                                       \
 337       /* 2^-48 */ 3.5527136788005009e-15,                                       \
 338       /* 2^-72 */ 2.1175823681357508e-22,                                       \
 339       /* 2^-96 */ 1.2621774483536189e-29,                                       \
 340       /* 2^28 */ 2.68435456e+08,                                                \
 341       /* 2^4 */ 1.600000e+01,                                                   \
 342       /* 2^-20 */ 9.5367431640625e-07,                                          \
 343       /* 2^-44 */ 5.6843418860808015e-14,                                       \
 344       /* 2^-68 */ 3.3881317890172014e-21,                                       \
 345       /* 2^-92 */ 2.0194839173657902e-28,                                       \
 346       /* 2^-116 */ 1.2037062152420224e-35};                                     \
 347     double _a240, _b240, _c240, _d240, _e240, _f240,                            \
 348            _g240, _h240, _i240, _j240, _k240;                                   \
 349     union { double d; UDItype i; } _l240, _m240, _n240, _o240,                  \
 350                                    _p240, _q240, _r240, _s240;                  \
 351     UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;                       \
 352                                                                                 \
 353     if (wfracbits < 106 || wfracbits > 120)                                     \
 354       abort();                                                                  \
 355                                                                                 \
 356     setfetz;                                                                    \
 357                                                                                 \
 358     _e240 = (double)(long)(X##_f0 & 0xffffff);                                  \
 359     _j240 = (double)(long)(Y##_f0 & 0xffffff);                                  \
 360     _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);                          \
 361     _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);                          \
 362     _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));       \
 363     _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));       \
 364     _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);                           \
 365     _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);                           \
 366     _a240 = (double)(long)(X##_f1 >> 32);                                       \
 367     _f240 = (double)(long)(Y##_f1 >> 32);                                       \
 368     _e240 *= _const[3];                                                         \
 369     _j240 *= _const[3];                                                         \
 370     _d240 *= _const[2];                                                         \
 371     _i240 *= _const[2];                                                         \
 372     _c240 *= _const[1];                                                         \
 373     _h240 *= _const[1];                                                         \
 374     _b240 *= _const[0];                                                         \
 375     _g240 *= _const[0];                                                         \
 376     _s240.d =                                                         _e240*_j240;\
 377     _r240.d =                                           _d240*_j240 + _e240*_i240;\
 378     _q240.d =                             _c240*_j240 + _d240*_i240 + _e240*_h240;\
 379     _p240.d =               _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
 380     _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
 381     _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;            \
 382     _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                          \
 383     _l240.d = _a240*_g240 + _b240*_f240;                                        \
 384     _k240 =   _a240*_f240;                                                      \
 385     _r240.d += _s240.d;                                                         \
 386     _q240.d += _r240.d;                                                         \
 387     _p240.d += _q240.d;                                                         \
 388     _o240.d += _p240.d;                                                         \
 389     _n240.d += _o240.d;                                                         \
 390     _m240.d += _n240.d;                                                         \
 391     _l240.d += _m240.d;                                                         \
 392     _k240 += _l240.d;                                                           \
 393     _s240.d -= ((_const[10]+_s240.d)-_const[10]);                               \
 394     _r240.d -= ((_const[9]+_r240.d)-_const[9]);                                 \
 395     _q240.d -= ((_const[8]+_q240.d)-_const[8]);                                 \
 396     _p240.d -= ((_const[7]+_p240.d)-_const[7]);                                 \
 397     _o240.d += _const[7];                                                       \
 398     _n240.d += _const[6];                                                       \
 399     _m240.d += _const[5];                                                       \
 400     _l240.d += _const[4];                                                       \
 401     if (_s240.d != 0.0) _y240 = 1;                                              \
 402     if (_r240.d != 0.0) _y240 = 1;                                              \
 403     if (_q240.d != 0.0) _y240 = 1;                                              \
 404     if (_p240.d != 0.0) _y240 = 1;                                              \
 405     _t240 = (DItype)_k240;                                                      \
 406     _u240 = _l240.i;                                                            \
 407     _v240 = _m240.i;                                                            \
 408     _w240 = _n240.i;                                                            \
 409     _x240 = _o240.i;                                                            \
 410     R##_f1 = (_t240 << (128 - (wfracbits - 1)))                                 \
 411              | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));                 \
 412     R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))                    \
 413              | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))                  \
 414              | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))                  \
 415              | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))                   \
 416              | _y240;                                                           \
 417     resetfe;                                                                    \
 418   } while (0)
 419
 420 /*
 421  * Division algorithms:
 422  */
 423
 424 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
 425   do {                                                                  \
 426     _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;         \
 427     if (_FP_FRAC_GT_2(X, Y))                                            \
 428       {                                                                 \
 429         _n_f2 = X##_f1 >> 1;                                            \
 430         _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;          \
 431         _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                        \
 432       }                                                                 \
 433     else                                                                \
 434       {                                                                 \
 435         R##_e--;                                                        \
 436         _n_f2 = X##_f1;                                                 \
 437         _n_f1 = X##_f0;                                                 \
 438         _n_f0 = 0;                                                      \
 439       }                                                                 \
 440                                                                         \
 441     /* Normalize, i.e. make the most significant bit of the             \
 442        denominator set. */                                              \
 443     _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);                             \
 444                                                                         \
 445     udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);                    \
 446     umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);                            \
 447     _r_f0 = _n_f0;                                                      \
 448     if (_FP_FRAC_GT_2(_m, _r))                                          \
 449       {                                                                 \
 450         R##_f1--;                                                       \
 451         _FP_FRAC_ADD_2(_r, Y, _r);                                      \
 452         if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
 453           {                                                             \
 454             R##_f1--;                                                   \
 455             _FP_FRAC_ADD_2(_r, Y, _r);                                  \
 456           }                                                             \
 457       }                                                                 \
 458     _FP_FRAC_DEC_2(_r, _m);                                             \
 459                                                                         \
 460     if (_r_f1 == Y##_f1)                                                \
 461       {                                                                 \
 462         /* This is a special case, not an optimization                  \
 463            (_r/Y##_f1 would not fit into UWtype).                       \
 464            As _r is guaranteed to be < Y,  R##_f0 can be either         \
 465            (UWtype)-1 or (UWtype)-2.  But as we know what kind          \
 466            of bits it is (sticky, guard, round),  we don't care.        \
 467            We also don't care what the reminder is,  because the        \
 468            guard bit will be set anyway.  -jj */                        \
 469         R##_f0 = -1;                                                    \
 470       }                                                                 \
 471     else                                                                \
 472       {                                                                 \
 473         udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);                \
 474         umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);                        \
 475         _r_f0 = 0;                                                      \
 476         if (_FP_FRAC_GT_2(_m, _r))                                      \
 477           {                                                             \
 478             R##_f0--;                                                   \
 479             _FP_FRAC_ADD_2(_r, Y, _r);                                  \
 480             if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))          \
 481               {                                                         \
 482                 R##_f0--;                                               \
 483                 _FP_FRAC_ADD_2(_r, Y, _r);                              \
 484               }                                                         \
 485           }                                                             \
 486         if (!_FP_FRAC_EQ_2(_r, _m))                                     \
 487           R##_f0 |= _FP_WORK_STICKY;                                    \
 488       }                                                                 \
 489   } while (0)
 490
 491
 492 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)                                 \
 493   do {                                                                  \
 494     _FP_W_TYPE _x[4], _y[2], _z[4];                                     \
 495     _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
 496     _x[0] = _x[3] = 0;                                                  \
 497     if (_FP_FRAC_GT_2(X, Y))                                            \
 498       {                                                                 \
 499         R##_e++;                                                        \
 500         _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |   \
 501                  X##_f1 >> (_FP_W_TYPE_SIZE -                           \
 502                             (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
 503         _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);     \
 504       }                                                                 \
 505     else                                                                \
 506       {                                                                 \
 507         _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |     \
 508                  X##_f1 >> (_FP_W_TYPE_SIZE -                           \
 509                             (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));   \
 510         _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);       \
 511       }                                                                 \
 512                                                                         \
 513     (void) mpn_divrem (_z, 0, _x, 4, _y, 2);                            \
 514     R##_f1 = _z[1];                                                     \
 515     R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);                            \
 516   } while (0)
 517
 518
 519 /*
 520  * Square root algorithms:
 521  * We have just one right now, maybe Newton approximation
 522  * should be added for those machines where division is fast.
 523  */
 524
 525 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                  \
 526   do {                                                  \
 527     while (q)                                           \
 528       {                                                 \
 529         T##_f1 = S##_f1 + q;                            \
 530         if (T##_f1 <= X##_f1)                           \
 531           {                                             \
 532             S##_f1 = T##_f1 + q;                        \
 533             X##_f1 -= T##_f1;                           \
 534             R##_f1 += q;                                \
 535           }                                             \
 536         _FP_FRAC_SLL_2(X, 1);                           \
 537         q >>= 1;                                        \
 538       }                                                 \
 539     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);         \
 540     while (q != _FP_WORK_ROUND)                         \
 541       {                                                 \
 542         T##_f0 = S##_f0 + q;                            \
 543         T##_f1 = S##_f1;                                \
 544         if (T##_f1 < X##_f1 ||                          \
 545             (T##_f1 == X##_f1 && T##_f0 <= X##_f0))     \
 546           {                                             \
 547             S##_f0 = T##_f0 + q;                        \
 548             S##_f1 += (T##_f0 > S##_f0);                \
 549             _FP_FRAC_DEC_2(X, T);                       \
 550             R##_f0 += q;                                \
 551           }                                             \
 552         _FP_FRAC_SLL_2(X, 1);                           \
 553         q >>= 1;                                        \
 554       }                                                 \
 555     if (X##_f0 | X##_f1)                                \
 556       {                                                 \
 557         if (S##_f1 < X##_f1 ||                          \
 558             (S##_f1 == X##_f1 && S##_f0 < X##_f0))      \
 559           R##_f0 |= _FP_WORK_ROUND;                     \
 560         R##_f0 |= _FP_WORK_STICKY;                      \
 561       }                                                 \
 562   } while (0)
 563
 564
 565 /*
 566  * Assembly/disassembly for converting to/from integral types.
 567  * No shifting or overflow handled here.
 568  */
 569
 570 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
 571   do {                                          \
 572     if (rsize <= _FP_W_TYPE_SIZE)               \
 573       r = X##_f0;                               \
 574     else                                        \
 575       {                                         \
 576         r = X##_f1;                             \
 577         r <<= _FP_W_TYPE_SIZE;                  \
 578         r += X##_f0;                            \
 579       }                                         \
 580   } while (0)
 581
 582 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
 583   do {                                                                  \
 584     X##_f0 = r;                                                         \
 585     X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);     \
 586   } while (0)
 587
 588 /*
 589  * Convert FP values between word sizes
 590  */
 591
 592 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)                               \
 593   do {                                                                  \
 594     if (S##_c != FP_CLS_NAN)                                            \
 595       _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),    \
 596                      _FP_WFRACBITS_##sfs);                              \
 597     else                                                                \
 598       _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs));   \
 599     D##_f = S##_f0;                                                     \
 600   } while (0)
 601
 602 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)                               \
 603   do {                                                                  \
 604     D##_f0 = S##_f;                                                     \
 605     D##_f1 = 0;                                                         \
 606     _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));     \
 607   } while (0)
 608