soft-fp/op-4.h

   1 /* Software floating-point emulation.
   2    Basic four-word fraction declaration and manipulation.
   3    Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Richard Henderson (rth@cygnus.com),
   6                   Jakub Jelinek (jj@ultra.linux.cz),
   7                   David S. Miller (davem@redhat.com) and
   8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
   9
  10    The GNU C Library is free software; you can redistribute it and/or
  11    modify it under the terms of the GNU Lesser General Public
  12    License as published by the Free Software Foundation; either
  13    version 2.1 of the License, or (at your option) any later version.
  14
  15    In addition to the permissions in the GNU Lesser General Public
  16    License, the Free Software Foundation gives you unlimited
  17    permission to link the compiled version of this file into
  18    combinations with other programs, and to distribute those
  19    combinations without any restriction coming from the use of this
  20    file.  (The Lesser General Public License restrictions do apply in
  21    other respects; for example, they cover modification of the file,
  22    and distribution when not linked into a combine executable.)
  23
  24    The GNU C Library is distributed in the hope that it will be useful,
  25    but WITHOUT ANY WARRANTY; without even the implied warranty of
  26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27    Lesser General Public License for more details.
  28
  29    You should have received a copy of the GNU Lesser General Public
  30    License along with the GNU C Library; if not, see
  31    <http://www.gnu.org/licenses/>.  */
  32
  33 #define _FP_FRAC_DECL_4(X)      _FP_W_TYPE X##_f[4]
  34 #define _FP_FRAC_COPY_4(D,S)                    \
  35   (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],    \
  36    D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
  37 #define _FP_FRAC_SET_4(X,I)     __FP_FRAC_SET_4(X, I)
  38 #define _FP_FRAC_HIGH_4(X)      (X##_f[3])
  39 #define _FP_FRAC_LOW_4(X)       (X##_f[0])
  40 #define _FP_FRAC_WORD_4(X,w)    (X##_f[w])
  41
  42 #define _FP_FRAC_SLL_4(X,N)                                             \
  43   do {                                                                  \
  44     _FP_I_TYPE _up, _down, _skip, _i;                                   \
  45     _skip = (N) / _FP_W_TYPE_SIZE;                                      \
  46     _up = (N) % _FP_W_TYPE_SIZE;                                        \
  47     _down = _FP_W_TYPE_SIZE - _up;                                      \
  48     if (!_up)                                                           \
  49       for (_i = 3; _i >= _skip; --_i)                                   \
  50         X##_f[_i] = X##_f[_i-_skip];                                    \
  51     else                                                                \
  52       {                                                                 \
  53         for (_i = 3; _i > _skip; --_i)                                  \
  54           X##_f[_i] = X##_f[_i-_skip] << _up                            \
  55                       | X##_f[_i-_skip-1] >> _down;                     \
  56         X##_f[_i--] = X##_f[0] << _up;                                  \
  57       }                                                                 \
  58     for (; _i >= 0; --_i)                                               \
  59       X##_f[_i] = 0;                                                    \
  60   } while (0)
  61
  62 /* This one was broken too */
  63 #define _FP_FRAC_SRL_4(X,N)                                             \
  64   do {                                                                  \
  65     _FP_I_TYPE _up, _down, _skip, _i;                                   \
  66     _skip = (N) / _FP_W_TYPE_SIZE;                                      \
  67     _down = (N) % _FP_W_TYPE_SIZE;                                      \
  68     _up = _FP_W_TYPE_SIZE - _down;                                      \
  69     if (!_down)                                                         \
  70       for (_i = 0; _i <= 3-_skip; ++_i)                                 \
  71         X##_f[_i] = X##_f[_i+_skip];                                    \
  72     else                                                                \
  73       {                                                                 \
  74         for (_i = 0; _i < 3-_skip; ++_i)                                \
  75           X##_f[_i] = X##_f[_i+_skip] >> _down                          \
  76                       | X##_f[_i+_skip+1] << _up;                       \
  77         X##_f[_i++] = X##_f[3] >> _down;                                \
  78       }                                                                 \
  79     for (; _i < 4; ++_i)                                                \
  80       X##_f[_i] = 0;                                                    \
  81   } while (0)
  82
  83
  84 /* Right shift with sticky-lsb.
  85  * What this actually means is that we do a standard right-shift,
  86  * but that if any of the bits that fall off the right hand side
  87  * were one then we always set the LSbit.
  88  */
  89 #define _FP_FRAC_SRST_4(X,S,N,size)                     \
  90   do {                                                  \
  91     _FP_I_TYPE _up, _down, _skip, _i;                   \
  92     _FP_W_TYPE _s;                                      \
  93     _skip = (N) / _FP_W_TYPE_SIZE;                      \
  94     _down = (N) % _FP_W_TYPE_SIZE;                      \
  95     _up = _FP_W_TYPE_SIZE - _down;                      \
  96     for (_s = _i = 0; _i < _skip; ++_i)                 \
  97       _s |= X##_f[_i];                                  \
  98     if (!_down)                                         \
  99       for (_i = 0; _i <= 3-_skip; ++_i)                 \
 100         X##_f[_i] = X##_f[_i+_skip];                    \
 101     else                                                \
 102       {                                                 \
 103         _s |= X##_f[_i] << _up;                         \
 104         for (_i = 0; _i < 3-_skip; ++_i)                \
 105           X##_f[_i] = X##_f[_i+_skip] >> _down          \
 106                       | X##_f[_i+_skip+1] << _up;       \
 107         X##_f[_i++] = X##_f[3] >> _down;                \
 108       }                                                 \
 109     for (; _i < 4; ++_i)                                \
 110       X##_f[_i] = 0;                                    \
 111     S = (_s != 0);                                      \
 112   } while (0)
 113
 114 #define _FP_FRAC_SRS_4(X,N,size)                \
 115   do {                                          \
 116     int _sticky;                                \
 117     _FP_FRAC_SRST_4(X, _sticky, N, size);       \
 118     X##_f[0] |= _sticky;                        \
 119   } while (0)
 120
 121 #define _FP_FRAC_ADD_4(R,X,Y)                                           \
 122   __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
 123                   X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
 124                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 125
 126 #define _FP_FRAC_SUB_4(R,X,Y)                                           \
 127   __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
 128                   X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
 129                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 130
 131 #define _FP_FRAC_DEC_4(X,Y)                                             \
 132   __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
 133                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 134
 135 #define _FP_FRAC_ADDI_4(X,I)                                            \
 136   __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
 137
 138 #define _FP_ZEROFRAC_4  0,0,0,0
 139 #define _FP_MINFRAC_4   0,0,0,1
 140 #define _FP_MAXFRAC_4   (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
 141
 142 #define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
 143 #define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
 144 #define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
 145 #define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
 146
 147 #define _FP_FRAC_EQ_4(X,Y)                              \
 148  (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
 149   && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
 150
 151 #define _FP_FRAC_GT_4(X,Y)                              \
 152  (X##_f[3] > Y##_f[3] ||                                \
 153   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
 154    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
 155     (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
 156    ))                                                   \
 157   ))                                                    \
 158  )
 159
 160 #define _FP_FRAC_GE_4(X,Y)                              \
 161  (X##_f[3] > Y##_f[3] ||                                \
 162   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
 163    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
 164     (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
 165    ))                                                   \
 166   ))                                                    \
 167  )
 168
 169
 170 #define _FP_FRAC_CLZ_4(R,X)             \
 171   do {                                  \
 172     if (X##_f[3])                       \
 173     {                                   \
 174         __FP_CLZ(R,X##_f[3]);           \
 175     }                                   \
 176     else if (X##_f[2])                  \
 177     {                                   \
 178         __FP_CLZ(R,X##_f[2]);           \
 179         R += _FP_W_TYPE_SIZE;           \
 180     }                                   \
 181     else if (X##_f[1])                  \
 182     {                                   \
 183         __FP_CLZ(R,X##_f[1]);           \
 184         R += _FP_W_TYPE_SIZE*2;         \
 185     }                                   \
 186     else                                \
 187     {                                   \
 188         __FP_CLZ(R,X##_f[0]);           \
 189         R += _FP_W_TYPE_SIZE*3;         \
 190     }                                   \
 191   } while(0)
 192
 193
 194 #define _FP_UNPACK_RAW_4(fs, X, val)                            \
 195   do {                                                          \
 196     union _FP_UNION_##fs _flo; _flo.flt = (val);                \
 197     X##_f[0] = _flo.bits.frac0;                                 \
 198     X##_f[1] = _flo.bits.frac1;                                 \
 199     X##_f[2] = _flo.bits.frac2;                                 \
 200     X##_f[3] = _flo.bits.frac3;                                 \
 201     X##_e  = _flo.bits.exp;                                     \
 202     X##_s  = _flo.bits.sign;                                    \
 203   } while (0)
 204
 205 #define _FP_UNPACK_RAW_4_P(fs, X, val)                          \
 206   do {                                                          \
 207     union _FP_UNION_##fs *_flo =                                \
 208       (union _FP_UNION_##fs *)(val);                            \
 209                                                                 \
 210     X##_f[0] = _flo->bits.frac0;                                \
 211     X##_f[1] = _flo->bits.frac1;                                \
 212     X##_f[2] = _flo->bits.frac2;                                \
 213     X##_f[3] = _flo->bits.frac3;                                \
 214     X##_e  = _flo->bits.exp;                                    \
 215     X##_s  = _flo->bits.sign;                                   \
 216   } while (0)
 217
 218 #define _FP_PACK_RAW_4(fs, val, X)                              \
 219   do {                                                          \
 220     union _FP_UNION_##fs _flo;                                  \
 221     _flo.bits.frac0 = X##_f[0];                                 \
 222     _flo.bits.frac1 = X##_f[1];                                 \
 223     _flo.bits.frac2 = X##_f[2];                                 \
 224     _flo.bits.frac3 = X##_f[3];                                 \
 225     _flo.bits.exp   = X##_e;                                    \
 226     _flo.bits.sign  = X##_s;                                    \
 227     (val) = _flo.flt;                                           \
 228   } while (0)
 229
 230 #define _FP_PACK_RAW_4_P(fs, val, X)                            \
 231   do {                                                          \
 232     union _FP_UNION_##fs *_flo =                                \
 233       (union _FP_UNION_##fs *)(val);                            \
 234                                                                 \
 235     _flo->bits.frac0 = X##_f[0];                                \
 236     _flo->bits.frac1 = X##_f[1];                                \
 237     _flo->bits.frac2 = X##_f[2];                                \
 238     _flo->bits.frac3 = X##_f[3];                                \
 239     _flo->bits.exp   = X##_e;                                   \
 240     _flo->bits.sign  = X##_s;                                   \
 241   } while (0)
 242
 243 /*
 244  * Multiplication algorithms:
 245  */
 246
 247 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
 248
 249 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)                       \
 250   do {                                                                      \
 251     _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);          \
 252     _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);          \
 253                                                                             \
 254     doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
 255     doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);                                 \
 256     doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);                                 \
 257     doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);                                 \
 258     doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);                                 \
 259     doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);                                 \
 260     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
 261                     _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,                   \
 262                     0,0,_FP_FRAC_WORD_8(_z,1));                             \
 263     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
 264                     _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,                   \
 265                     _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
 266                     _FP_FRAC_WORD_8(_z,1));                                 \
 267     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 268                     _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,                   \
 269                     0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));         \
 270     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 271                     _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,                   \
 272                     _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 273                     _FP_FRAC_WORD_8(_z,2));                                 \
 274     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 275                     _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,                   \
 276                     _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
 277                     _FP_FRAC_WORD_8(_z,2));                                 \
 278     doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);                                 \
 279     doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);                                 \
 280     doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);                                 \
 281     doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);                                 \
 282     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 283                     _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,                   \
 284                     0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));         \
 285     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 286                     _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,                   \
 287                     _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 288                     _FP_FRAC_WORD_8(_z,3));                                 \
 289     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 290                     _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,                   \
 291                     _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 292                     _FP_FRAC_WORD_8(_z,3));                                 \
 293     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 294                     _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,                   \
 295                     _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
 296                     _FP_FRAC_WORD_8(_z,3));                                 \
 297     doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);                                 \
 298     doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);                                 \
 299     doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);                                 \
 300     doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);                                 \
 301     doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);                                 \
 302     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 303                     _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,                   \
 304                     0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));         \
 305     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 306                     _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,                   \
 307                     _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 308                     _FP_FRAC_WORD_8(_z,4));                                 \
 309     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 310                     _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,                   \
 311                     _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
 312                     _FP_FRAC_WORD_8(_z,4));                                 \
 313     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
 314                     _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,                   \
 315                     0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));         \
 316     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
 317                     _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,                   \
 318                     _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
 319                     _FP_FRAC_WORD_8(_z,5));                                 \
 320     doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);                                 \
 321     __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
 322                     _b_f1,_b_f0,                                            \
 323                     _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));           \
 324                                                                             \
 325     /* Normalize since we know where the msb of the multiplicands           \
 326        were (bit B), we know that the msb of the of the product is          \
 327        at either 2B or 2B-1.  */                                            \
 328     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
 329     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
 330                     _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
 331   } while (0)
 332
 333 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)                              \
 334   do {                                                                      \
 335     _FP_FRAC_DECL_8(_z);                                                    \
 336                                                                             \
 337     mpn_mul_n(_z_f, _x_f, _y_f, 4);                                         \
 338                                                                             \
 339     /* Normalize since we know where the msb of the multiplicands           \
 340        were (bit B), we know that the msb of the of the product is          \
 341        at either 2B or 2B-1.  */                                            \
 342     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
 343     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
 344                     _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
 345   } while (0)
 346
 347 /*
 348  * Helper utility for _FP_DIV_MEAT_4_udiv:
 349  * pppp = m * nnn
 350  */
 351 #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)                               \
 352   do {                                                                      \
 353     UWtype _t;                                                              \
 354     umul_ppmm(p1,p0,m,n0);                                                  \
 355     umul_ppmm(p2,_t,m,n1);                                                  \
 356     __FP_FRAC_ADDI_2(p2,p1,_t);                                             \
 357     umul_ppmm(p3,_t,m,n2);                                                  \
 358     __FP_FRAC_ADDI_2(p3,p2,_t);                                             \
 359   } while (0)
 360
 361 /*
 362  * Division algorithms:
 363  */
 364
 365 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)                                    \
 366   do {                                                                      \
 367     int _i;                                                                 \
 368     _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);                               \
 369     _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);                                     \
 370     if (_FP_FRAC_GT_4(X, Y))                                                \
 371       {                                                                     \
 372         _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);                        \
 373         _FP_FRAC_SRL_4(X, 1);                                               \
 374       }                                                                     \
 375     else                                                                    \
 376       R##_e--;                                                              \
 377                                                                             \
 378     /* Normalize, i.e. make the most significant bit of the                 \
 379        denominator set. */                                                  \
 380     _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);                                 \
 381                                                                             \
 382     for (_i = 3; ; _i--)                                                    \
 383       {                                                                     \
 384         if (X##_f[3] == Y##_f[3])                                           \
 385           {                                                                 \
 386             /* This is a special case, not an optimization                  \
 387                (X##_f[3]/Y##_f[3] would not fit into UWtype).               \
 388                As X## is guaranteed to be < Y,  R##_f[_i] can be either     \
 389                (UWtype)-1 or (UWtype)-2.  */                                \
 390             R##_f[_i] = -1;                                                 \
 391             if (!_i)                                                        \
 392               break;                                                        \
 393             __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],         \
 394                             Y##_f[2], Y##_f[1], Y##_f[0], 0,                \
 395                             X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);        \
 396             _FP_FRAC_SUB_4(X, Y, X);                                        \
 397             if (X##_f[3] > Y##_f[3])                                        \
 398               {                                                             \
 399                 R##_f[_i] = -2;                                             \
 400                 _FP_FRAC_ADD_4(X, Y, X);                                    \
 401               }                                                             \
 402           }                                                                 \
 403         else                                                                \
 404           {                                                                 \
 405             udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
 406             umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],               \
 407                           R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);         \
 408             X##_f[2] = X##_f[1];                                            \
 409             X##_f[1] = X##_f[0];                                            \
 410             X##_f[0] = _n_f[_i];                                            \
 411             if (_FP_FRAC_GT_4(_m, X))                                       \
 412               {                                                             \
 413                 R##_f[_i]--;                                                \
 414                 _FP_FRAC_ADD_4(X, Y, X);                                    \
 415                 if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))            \
 416                   {                                                         \
 417                     R##_f[_i]--;                                            \
 418                     _FP_FRAC_ADD_4(X, Y, X);                                \
 419                   }                                                         \
 420               }                                                             \
 421             _FP_FRAC_DEC_4(X, _m);                                          \
 422             if (!_i)                                                        \
 423               {                                                             \
 424                 if (!_FP_FRAC_EQ_4(X, _m))                                  \
 425                   R##_f[0] |= _FP_WORK_STICKY;                              \
 426                 break;                                                      \
 427               }                                                             \
 428           }                                                                 \
 429       }                                                                     \
 430   } while (0)
 431
 432
 433 /*
 434  * Square root algorithms:
 435  * We have just one right now, maybe Newton approximation
 436  * should be added for those machines where division is fast.
 437  */
 438
 439 #define _FP_SQRT_MEAT_4(R, S, T, X, q)                          \
 440   do {                                                          \
 441     while (q)                                                   \
 442       {                                                         \
 443         T##_f[3] = S##_f[3] + q;                                \
 444         if (T##_f[3] <= X##_f[3])                               \
 445           {                                                     \
 446             S##_f[3] = T##_f[3] + q;                            \
 447             X##_f[3] -= T##_f[3];                               \
 448             R##_f[3] += q;                                      \
 449           }                                                     \
 450         _FP_FRAC_SLL_4(X, 1);                                   \
 451         q >>= 1;                                                \
 452       }                                                         \
 453     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
 454     while (q)                                                   \
 455       {                                                         \
 456         T##_f[2] = S##_f[2] + q;                                \
 457         T##_f[3] = S##_f[3];                                    \
 458         if (T##_f[3] < X##_f[3] ||                              \
 459             (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))     \
 460           {                                                     \
 461             S##_f[2] = T##_f[2] + q;                            \
 462             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
 463             __FP_FRAC_DEC_2(X##_f[3], X##_f[2],                 \
 464                             T##_f[3], T##_f[2]);                \
 465             R##_f[2] += q;                                      \
 466           }                                                     \
 467         _FP_FRAC_SLL_4(X, 1);                                   \
 468         q >>= 1;                                                \
 469       }                                                         \
 470     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
 471     while (q)                                                   \
 472       {                                                         \
 473         T##_f[1] = S##_f[1] + q;                                \
 474         T##_f[2] = S##_f[2];                                    \
 475         T##_f[3] = S##_f[3];                                    \
 476         if (T##_f[3] < X##_f[3] ||                              \
 477             (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||    \
 478              (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))  \
 479           {                                                     \
 480             S##_f[1] = T##_f[1] + q;                            \
 481             S##_f[2] += (T##_f[1] > S##_f[1]);                  \
 482             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
 483             __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],       \
 484                             T##_f[3], T##_f[2], T##_f[1]);      \
 485             R##_f[1] += q;                                      \
 486           }                                                     \
 487         _FP_FRAC_SLL_4(X, 1);                                   \
 488         q >>= 1;                                                \
 489       }                                                         \
 490     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
 491     while (q != _FP_WORK_ROUND)                                 \
 492       {                                                         \
 493         T##_f[0] = S##_f[0] + q;                                \
 494         T##_f[1] = S##_f[1];                                    \
 495         T##_f[2] = S##_f[2];                                    \
 496         T##_f[3] = S##_f[3];                                    \
 497         if (_FP_FRAC_GE_4(X,T))                                 \
 498           {                                                     \
 499             S##_f[0] = T##_f[0] + q;                            \
 500             S##_f[1] += (T##_f[0] > S##_f[0]);                  \
 501             S##_f[2] += (T##_f[1] > S##_f[1]);                  \
 502             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
 503             _FP_FRAC_DEC_4(X, T);                               \
 504             R##_f[0] += q;                                      \
 505           }                                                     \
 506         _FP_FRAC_SLL_4(X, 1);                                   \
 507         q >>= 1;                                                \
 508       }                                                         \
 509     if (!_FP_FRAC_ZEROP_4(X))                                   \
 510       {                                                         \
 511         if (_FP_FRAC_GT_4(X,S))                                 \
 512           R##_f[0] |= _FP_WORK_ROUND;                           \
 513         R##_f[0] |= _FP_WORK_STICKY;                            \
 514       }                                                         \
 515   } while (0)
 516
 517
 518 /*
 519  * Internals
 520  */
 521
 522 #define __FP_FRAC_SET_4(X,I3,I2,I1,I0)                                  \
 523   (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
 524
 525 #ifndef __FP_FRAC_ADD_3
 526 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
 527   do {                                                          \
 528     _FP_W_TYPE _c1, _c2;                                        \
 529     r0 = x0 + y0;                                               \
 530     _c1 = r0 < x0;                                              \
 531     r1 = x1 + y1;                                               \
 532     _c2 = r1 < x1;                                              \
 533     r1 += _c1;                                                  \
 534     _c2 |= r1 < _c1;                                            \
 535     r2 = x2 + y2 + _c2;                                         \
 536   } while (0)
 537 #endif
 538
 539 #ifndef __FP_FRAC_ADD_4
 540 #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
 541   do {                                                          \
 542     _FP_W_TYPE _c1, _c2, _c3;                                   \
 543     r0 = x0 + y0;                                               \
 544     _c1 = r0 < x0;                                              \
 545     r1 = x1 + y1;                                               \
 546     _c2 = r1 < x1;                                              \
 547     r1 += _c1;                                                  \
 548     _c2 |= r1 < _c1;                                            \
 549     r2 = x2 + y2;                                               \
 550     _c3 = r2 < x2;                                              \
 551     r2 += _c2;                                                  \
 552     _c3 |= r2 < _c2;                                            \
 553     r3 = x3 + y3 + _c3;                                         \
 554   } while (0)
 555 #endif
 556
 557 #ifndef __FP_FRAC_SUB_3
 558 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
 559   do {                                                          \
 560     _FP_W_TYPE _c1, _c2;                                        \
 561     r0 = x0 - y0;                                               \
 562     _c1 = r0 > x0;                                              \
 563     r1 = x1 - y1;                                               \
 564     _c2 = r1 > x1;                                              \
 565     r1 -= _c1;                                                  \
 566     _c2 |= _c1 && (y1 == x1);                                   \
 567     r2 = x2 - y2 - _c2;                                         \
 568   } while (0)
 569 #endif
 570
 571 #ifndef __FP_FRAC_SUB_4
 572 #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
 573   do {                                                          \
 574     _FP_W_TYPE _c1, _c2, _c3;                                   \
 575     r0 = x0 - y0;                                               \
 576     _c1 = r0 > x0;                                              \
 577     r1 = x1 - y1;                                               \
 578     _c2 = r1 > x1;                                              \
 579     r1 -= _c1;                                                  \
 580     _c2 |= _c1 && (y1 == x1);                                   \
 581     r2 = x2 - y2;                                               \
 582     _c3 = r2 > x2;                                              \
 583     r2 -= _c2;                                                  \
 584     _c3 |= _c2 && (y2 == x2);                                   \
 585     r3 = x3 - y3 - _c3;                                         \
 586   } while (0)
 587 #endif
 588
 589 #ifndef __FP_FRAC_DEC_3
 590 #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)                              \
 591   do {                                                                  \
 592     UWtype _t0, _t1, _t2;                                               \
 593     _t0 = x0, _t1 = x1, _t2 = x2;                                       \
 594     __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);            \
 595   } while (0)
 596 #endif
 597
 598 #ifndef __FP_FRAC_DEC_4
 599 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)                        \
 600   do {                                                                  \
 601     UWtype _t0, _t1, _t2, _t3;                                          \
 602     _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;                             \
 603     __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);         \
 604   } while (0)
 605 #endif
 606
 607 #ifndef __FP_FRAC_ADDI_4
 608 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \
 609   do {                                                                  \
 610     UWtype _t;                                                          \
 611     _t = ((x0 += i) < i);                                               \
 612     x1 += _t; _t = (x1 < _t);                                           \
 613     x2 += _t; _t = (x2 < _t);                                           \
 614     x3 += _t;                                                           \
 615   } while (0)
 616 #endif
 617
 618 /* Convert FP values between word sizes. This appears to be more
 619  * complicated than I'd have expected it to be, so these might be
 620  * wrong... These macros are in any case somewhat bogus because they
 621  * use information about what various FRAC_n variables look like
 622  * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
 623  * the ones in op-2.h and op-1.h.
 624  */
 625 #define _FP_FRAC_COPY_1_4(D, S)         (D##_f = S##_f[0])
 626
 627 #define _FP_FRAC_COPY_2_4(D, S)                 \
 628 do {                                            \
 629   D##_f0 = S##_f[0];                            \
 630   D##_f1 = S##_f[1];                            \
 631 } while (0)
 632
 633 /* Assembly/disassembly for converting to/from integral types.
 634  * No shifting or overflow handled here.
 635  */
 636 /* Put the FP value X into r, which is an integer of size rsize. */
 637 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
 638   do {                                                                  \
 639     if (rsize <= _FP_W_TYPE_SIZE)                                       \
 640       r = X##_f[0];                                                     \
 641     else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \
 642     {                                                                   \
 643       r = X##_f[1];                                                     \
 644       r <<= _FP_W_TYPE_SIZE;                                            \
 645       r += X##_f[0];                                                    \
 646     }                                                                   \
 647     else                                                                \
 648     {                                                                   \
 649       /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
 650       /* and int == 4words as a single case.                     */     \
 651       r = X##_f[3];                                                     \
 652       r <<= _FP_W_TYPE_SIZE;                                            \
 653       r += X##_f[2];                                                    \
 654       r <<= _FP_W_TYPE_SIZE;                                            \
 655       r += X##_f[1];                                                    \
 656       r <<= _FP_W_TYPE_SIZE;                                            \
 657       r += X##_f[0];                                                    \
 658     }                                                                   \
 659   } while (0)
 660
 661 /* "No disassemble Number Five!" */
 662 /* move an integer of size rsize into X's fractional part. We rely on
 663  * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
 664  * having to mask the values we store into it.
 665  */
 666 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
 667   do {                                                                  \
 668     X##_f[0] = r;                                                       \
 669     X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
 670     X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
 671     X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
 672   } while (0);
 673
 674 #define _FP_FRAC_COPY_4_1(D, S)                 \
 675 do {                                            \
 676   D##_f[0] = S##_f;                             \
 677   D##_f[1] = D##_f[2] = D##_f[3] = 0;           \
 678 } while (0)
 679
 680 #define _FP_FRAC_COPY_4_2(D, S)                 \
 681 do {                                            \
 682   D##_f[0] = S##_f0;                            \
 683   D##_f[1] = S##_f1;                            \
 684   D##_f[2] = D##_f[3] = 0;                      \
 685 } while (0)
 686
 687 #define _FP_FRAC_COPY_4_4(D,S)  _FP_FRAC_COPY_4(D,S)