soft-fp: fix floating-point to integer unsigned saturation.
[platform/upstream/glibc.git] / soft-fp / op-2.h
1 /* Software floating-point emulation.
2    Basic two-word fraction declaration and manipulation.
3    Copyright (C) 1997-2013 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Richard Henderson (rth@cygnus.com),
6                   Jakub Jelinek (jj@ultra.linux.cz),
7                   David S. Miller (davem@redhat.com) and
8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10    The GNU C Library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2.1 of the License, or (at your option) any later version.
14
15    In addition to the permissions in the GNU Lesser General Public
16    License, the Free Software Foundation gives you unlimited
17    permission to link the compiled version of this file into
18    combinations with other programs, and to distribute those
19    combinations without any restriction coming from the use of this
20    file.  (The Lesser General Public License restrictions do apply in
21    other respects; for example, they cover modification of the file,
22    and distribution when not linked into a combine executable.)
23
24    The GNU C Library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with the GNU C Library; if not, see
31    <http://www.gnu.org/licenses/>.  */
32
33 #define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
34 #define _FP_FRAC_COPY_2(D,S)    (D##_f0 = S##_f0, D##_f1 = S##_f1)
35 #define _FP_FRAC_SET_2(X,I)     __FP_FRAC_SET_2(X, I)
36 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
37 #define _FP_FRAC_LOW_2(X)       (X##_f0)
38 #define _FP_FRAC_WORD_2(X,w)    (X##_f##w)
39
40 #define _FP_FRAC_SLL_2(X,N)                                                 \
41 (void)(((N) < _FP_W_TYPE_SIZE)                                              \
42        ? ({                                                                 \
43             if (__builtin_constant_p(N) && (N) == 1)                        \
44               {                                                             \
45                 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
46                 X##_f0 += X##_f0;                                           \
47               }                                                             \
48             else                                                            \
49               {                                                             \
50                 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51                 X##_f0 <<= (N);                                             \
52               }                                                             \
53             0;                                                              \
54           })                                                                \
55        : ({                                                                 \
56             X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                     \
57             X##_f0 = 0;                                                     \
58           }))
59
60
61 #define _FP_FRAC_SRL_2(X,N)                                             \
62 (void)(((N) < _FP_W_TYPE_SIZE)                                          \
63        ? ({                                                             \
64             X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65             X##_f1 >>= (N);                                             \
66           })                                                            \
67        : ({                                                             \
68             X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                 \
69             X##_f1 = 0;                                                 \
70           }))
71
72 /* Right shift with sticky-lsb.  */
73 #define _FP_FRAC_SRST_2(X,S, N,sz)                                        \
74 (void)(((N) < _FP_W_TYPE_SIZE)                                            \
75        ? ({                                                               \
76             S = (__builtin_constant_p(N) && (N) == 1                      \
77                  ? X##_f0 & 1                                             \
78                  : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);             \
79             X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80             X##_f1 >>= (N);                                               \
81           })                                                              \
82        : ({                                                               \
83             S = ((((N) == _FP_W_TYPE_SIZE                                 \
84                    ? 0                                                    \
85                    : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))               \
86                   | X##_f0) != 0);                                        \
87             X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));                 \
88             X##_f1 = 0;                                                   \
89           }))
90
91 #define _FP_FRAC_SRS_2(X,N,sz)                                            \
92 (void)(((N) < _FP_W_TYPE_SIZE)                                            \
93        ? ({                                                               \
94             X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
95                       (__builtin_constant_p(N) && (N) == 1                \
96                        ? X##_f0 & 1                                       \
97                        : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));      \
98             X##_f1 >>= (N);                                               \
99           })                                                              \
100        : ({                                                               \
101             X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |                 \
102                       ((((N) == _FP_W_TYPE_SIZE                           \
103                          ? 0                                              \
104                          : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))         \
105                         | X##_f0) != 0));                                 \
106             X##_f1 = 0;                                                   \
107           }))
108
109 #define _FP_FRAC_ADDI_2(X,I)    \
110   __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
111
112 #define _FP_FRAC_ADD_2(R,X,Y)   \
113   __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
114
115 #define _FP_FRAC_SUB_2(R,X,Y)   \
116   __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
117
118 #define _FP_FRAC_DEC_2(X,Y)     \
119   __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
120
121 #define _FP_FRAC_CLZ_2(R,X)     \
122   do {                          \
123     if (X##_f1)                 \
124       __FP_CLZ(R,X##_f1);       \
125     else                        \
126     {                           \
127       __FP_CLZ(R,X##_f0);       \
128       R += _FP_W_TYPE_SIZE;     \
129     }                           \
130   } while(0)
131
132 /* Predicates */
133 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE)X##_f1 < 0)
134 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
135 #define _FP_FRAC_OVERP_2(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
136 #define _FP_FRAC_CLEAR_OVERP_2(fs,X)    (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
137 #define _FP_FRAC_HIGHBIT_DW_2(fs,X)     \
138   (_FP_FRAC_HIGH_DW_##fs(X) & _FP_HIGHBIT_DW_##fs)
139 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
140 #define _FP_FRAC_GT_2(X, Y)     \
141   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
142 #define _FP_FRAC_GE_2(X, Y)     \
143   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
144
145 #define _FP_ZEROFRAC_2          0, 0
146 #define _FP_MINFRAC_2           0, 1
147 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
148
149 /*
150  * Internals
151  */
152
153 #define __FP_FRAC_SET_2(X,I1,I0)        (X##_f0 = I0, X##_f1 = I1)
154
155 #define __FP_CLZ_2(R, xh, xl)   \
156   do {                          \
157     if (xh)                     \
158       __FP_CLZ(R,xh);           \
159     else                        \
160     {                           \
161       __FP_CLZ(R,xl);           \
162       R += _FP_W_TYPE_SIZE;     \
163     }                           \
164   } while(0)
165
166 #if 0
167
168 #ifndef __FP_FRAC_ADDI_2
169 #define __FP_FRAC_ADDI_2(xh, xl, i)     \
170   (xh += ((xl += i) < i))
171 #endif
172 #ifndef __FP_FRAC_ADD_2
173 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
174   (rh = xh + yh + ((rl = xl + yl) < xl))
175 #endif
176 #ifndef __FP_FRAC_SUB_2
177 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
178   (rh = xh - yh - ((rl = xl - yl) > xl))
179 #endif
180 #ifndef __FP_FRAC_DEC_2
181 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
182   do {                                  \
183     UWtype _t = xl;                     \
184     xh -= yh + ((xl -= yl) > _t);       \
185   } while (0)
186 #endif
187
188 #else
189
190 #undef __FP_FRAC_ADDI_2
191 #define __FP_FRAC_ADDI_2(xh, xl, i)     add_ssaaaa(xh, xl, xh, xl, 0, i)
192 #undef __FP_FRAC_ADD_2
193 #define __FP_FRAC_ADD_2                 add_ssaaaa
194 #undef __FP_FRAC_SUB_2
195 #define __FP_FRAC_SUB_2                 sub_ddmmss
196 #undef __FP_FRAC_DEC_2
197 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
198
199 #endif
200
201 /*
202  * Unpack the raw bits of a native fp value.  Do not classify or
203  * normalize the data.
204  */
205
206 #define _FP_UNPACK_RAW_2(fs, X, val)                    \
207   do {                                                  \
208     union _FP_UNION_##fs _flo; _flo.flt = (val);        \
209                                                         \
210     X##_f0 = _flo.bits.frac0;                           \
211     X##_f1 = _flo.bits.frac1;                           \
212     X##_e  = _flo.bits.exp;                             \
213     X##_s  = _flo.bits.sign;                            \
214   } while (0)
215
216 #define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
217   do {                                                  \
218     union _FP_UNION_##fs *_flo =                        \
219       (union _FP_UNION_##fs *)(val);                    \
220                                                         \
221     X##_f0 = _flo->bits.frac0;                          \
222     X##_f1 = _flo->bits.frac1;                          \
223     X##_e  = _flo->bits.exp;                            \
224     X##_s  = _flo->bits.sign;                           \
225   } while (0)
226
227
228 /*
229  * Repack the raw bits of a native fp value.
230  */
231
232 #define _FP_PACK_RAW_2(fs, val, X)                      \
233   do {                                                  \
234     union _FP_UNION_##fs _flo;                          \
235                                                         \
236     _flo.bits.frac0 = X##_f0;                           \
237     _flo.bits.frac1 = X##_f1;                           \
238     _flo.bits.exp   = X##_e;                            \
239     _flo.bits.sign  = X##_s;                            \
240                                                         \
241     (val) = _flo.flt;                                   \
242   } while (0)
243
244 #define _FP_PACK_RAW_2_P(fs, val, X)                    \
245   do {                                                  \
246     union _FP_UNION_##fs *_flo =                        \
247       (union _FP_UNION_##fs *)(val);                    \
248                                                         \
249     _flo->bits.frac0 = X##_f0;                          \
250     _flo->bits.frac1 = X##_f1;                          \
251     _flo->bits.exp   = X##_e;                           \
252     _flo->bits.sign  = X##_s;                           \
253   } while (0)
254
255
256 /*
257  * Multiplication algorithms:
258  */
259
260 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
261
262 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)                \
263   do {                                                                  \
264     _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);                           \
265                                                                         \
266     doit(_FP_FRAC_WORD_4(R,1), _FP_FRAC_WORD_4(R,0), X##_f0, Y##_f0);   \
267     doit(_b_f1, _b_f0, X##_f0, Y##_f1);                                 \
268     doit(_c_f1, _c_f0, X##_f1, Y##_f0);                                 \
269     doit(_FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2), X##_f1, Y##_f1);   \
270                                                                         \
271     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),          \
272                     _FP_FRAC_WORD_4(R,1), 0, _b_f1, _b_f0,              \
273                     _FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),          \
274                     _FP_FRAC_WORD_4(R,1));                              \
275     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),          \
276                     _FP_FRAC_WORD_4(R,1), 0, _c_f1, _c_f0,              \
277                     _FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),          \
278                     _FP_FRAC_WORD_4(R,1));                              \
279   } while (0)
280
281 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
282   do {                                                                  \
283     _FP_FRAC_DECL_4(_z);                                                \
284                                                                         \
285     _FP_MUL_MEAT_DW_2_wide(wfracbits, _z, X, Y, doit);                  \
286                                                                         \
287     /* Normalize since we know where the msb of the multiplicands       \
288        were (bit B), we know that the msb of the of the product is      \
289        at either 2B or 2B-1.  */                                        \
290     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
291     R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
292     R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
293   } while (0)
294
295 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
296    Do only 3 multiplications instead of four. This one is for machines
297    where multiplication is much more expensive than subtraction.  */
298
299 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)           \
300   do {                                                                  \
301     _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);                           \
302     _FP_W_TYPE _d;                                                      \
303     int _c1, _c2;                                                       \
304                                                                         \
305     _b_f0 = X##_f0 + X##_f1;                                            \
306     _c1 = _b_f0 < X##_f0;                                               \
307     _b_f1 = Y##_f0 + Y##_f1;                                            \
308     _c2 = _b_f1 < Y##_f0;                                               \
309     doit(_d, _FP_FRAC_WORD_4(R,0), X##_f0, Y##_f0);                     \
310     doit(_FP_FRAC_WORD_4(R,2), _FP_FRAC_WORD_4(R,1), _b_f0, _b_f1);     \
311     doit(_c_f1, _c_f0, X##_f1, Y##_f1);                                 \
312                                                                         \
313     _b_f0 &= -_c2;                                                      \
314     _b_f1 &= -_c1;                                                      \
315     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),          \
316                     _FP_FRAC_WORD_4(R,1), (_c1 & _c2), 0, _d,           \
317                     0, _FP_FRAC_WORD_4(R,2), _FP_FRAC_WORD_4(R,1));     \
318     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),         \
319                      _b_f0);                                            \
320     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),         \
321                      _b_f1);                                            \
322     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),          \
323                     _FP_FRAC_WORD_4(R,1),                               \
324                     0, _d, _FP_FRAC_WORD_4(R,0));                       \
325     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2),          \
326                     _FP_FRAC_WORD_4(R,1), 0, _c_f1, _c_f0);             \
327     __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2),         \
328                     _c_f1, _c_f0,                                       \
329                     _FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2));        \
330   } while (0)
331
332 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
333   do {                                                                  \
334     _FP_FRAC_DECL_4(_z);                                                \
335                                                                         \
336     _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, _z, X, Y, doit);             \
337                                                                         \
338     /* Normalize since we know where the msb of the multiplicands       \
339        were (bit B), we know that the msb of the of the product is      \
340        at either 2B or 2B-1.  */                                        \
341     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
342     R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
343     R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
344   } while (0)
345
346 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)                       \
347   do {                                                                  \
348     _FP_W_TYPE _x[2], _y[2];                                            \
349     _x[0] = X##_f0; _x[1] = X##_f1;                                     \
350     _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
351                                                                         \
352     mpn_mul_n(R##_f, _x, _y, 2);                                        \
353   } while (0)
354
355 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
356   do {                                                                  \
357     _FP_FRAC_DECL_4(_z);                                                \
358                                                                         \
359     _FP_MUL_MEAT_DW_2_gmp(wfracbits, _z, X, Y);                         \
360                                                                         \
361     /* Normalize since we know where the msb of the multiplicands       \
362        were (bit B), we know that the msb of the of the product is      \
363        at either 2B or 2B-1.  */                                        \
364     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);                       \
365     R##_f0 = _z_f[0];                                                   \
366     R##_f1 = _z_f[1];                                                   \
367   } while (0)
368
369 /* Do at most 120x120=240 bits multiplication using double floating
370    point multiplication.  This is useful if floating point
371    multiplication has much bigger throughput than integer multiply.
372    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
373    between 106 and 120 only.
374    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
375    SETFETZ is a macro which will disable all FPU exceptions and set rounding
376    towards zero,  RESETFE should optionally reset it back.  */
377
378 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)     \
379   do {                                                                          \
380     static const double _const[] = {                                            \
381       /* 2^-24 */ 5.9604644775390625e-08,                                       \
382       /* 2^-48 */ 3.5527136788005009e-15,                                       \
383       /* 2^-72 */ 2.1175823681357508e-22,                                       \
384       /* 2^-96 */ 1.2621774483536189e-29,                                       \
385       /* 2^28 */ 2.68435456e+08,                                                \
386       /* 2^4 */ 1.600000e+01,                                                   \
387       /* 2^-20 */ 9.5367431640625e-07,                                          \
388       /* 2^-44 */ 5.6843418860808015e-14,                                       \
389       /* 2^-68 */ 3.3881317890172014e-21,                                       \
390       /* 2^-92 */ 2.0194839173657902e-28,                                       \
391       /* 2^-116 */ 1.2037062152420224e-35};                                     \
392     double _a240, _b240, _c240, _d240, _e240, _f240,                            \
393            _g240, _h240, _i240, _j240, _k240;                                   \
394     union { double d; UDItype i; } _l240, _m240, _n240, _o240,                  \
395                                    _p240, _q240, _r240, _s240;                  \
396     UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;                       \
397                                                                                 \
398     if (wfracbits < 106 || wfracbits > 120)                                     \
399       abort();                                                                  \
400                                                                                 \
401     setfetz;                                                                    \
402                                                                                 \
403     _e240 = (double)(long)(X##_f0 & 0xffffff);                                  \
404     _j240 = (double)(long)(Y##_f0 & 0xffffff);                                  \
405     _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);                          \
406     _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);                          \
407     _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));       \
408     _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));       \
409     _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);                           \
410     _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);                           \
411     _a240 = (double)(long)(X##_f1 >> 32);                                       \
412     _f240 = (double)(long)(Y##_f1 >> 32);                                       \
413     _e240 *= _const[3];                                                         \
414     _j240 *= _const[3];                                                         \
415     _d240 *= _const[2];                                                         \
416     _i240 *= _const[2];                                                         \
417     _c240 *= _const[1];                                                         \
418     _h240 *= _const[1];                                                         \
419     _b240 *= _const[0];                                                         \
420     _g240 *= _const[0];                                                         \
421     _s240.d =                                                         _e240*_j240;\
422     _r240.d =                                           _d240*_j240 + _e240*_i240;\
423     _q240.d =                             _c240*_j240 + _d240*_i240 + _e240*_h240;\
424     _p240.d =               _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
425     _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
426     _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;            \
427     _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                          \
428     _l240.d = _a240*_g240 + _b240*_f240;                                        \
429     _k240 =   _a240*_f240;                                                      \
430     _r240.d += _s240.d;                                                         \
431     _q240.d += _r240.d;                                                         \
432     _p240.d += _q240.d;                                                         \
433     _o240.d += _p240.d;                                                         \
434     _n240.d += _o240.d;                                                         \
435     _m240.d += _n240.d;                                                         \
436     _l240.d += _m240.d;                                                         \
437     _k240 += _l240.d;                                                           \
438     _s240.d -= ((_const[10]+_s240.d)-_const[10]);                               \
439     _r240.d -= ((_const[9]+_r240.d)-_const[9]);                                 \
440     _q240.d -= ((_const[8]+_q240.d)-_const[8]);                                 \
441     _p240.d -= ((_const[7]+_p240.d)-_const[7]);                                 \
442     _o240.d += _const[7];                                                       \
443     _n240.d += _const[6];                                                       \
444     _m240.d += _const[5];                                                       \
445     _l240.d += _const[4];                                                       \
446     if (_s240.d != 0.0) _y240 = 1;                                              \
447     if (_r240.d != 0.0) _y240 = 1;                                              \
448     if (_q240.d != 0.0) _y240 = 1;                                              \
449     if (_p240.d != 0.0) _y240 = 1;                                              \
450     _t240 = (DItype)_k240;                                                      \
451     _u240 = _l240.i;                                                            \
452     _v240 = _m240.i;                                                            \
453     _w240 = _n240.i;                                                            \
454     _x240 = _o240.i;                                                            \
455     R##_f1 = (_t240 << (128 - (wfracbits - 1)))                                 \
456              | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));                 \
457     R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))                    \
458              | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))                  \
459              | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))                  \
460              | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))                   \
461              | _y240;                                                           \
462     resetfe;                                                                    \
463   } while (0)
464
465 /*
466  * Division algorithms:
467  */
468
469 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
470   do {                                                                  \
471     _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;         \
472     if (_FP_FRAC_GT_2(X, Y))                                            \
473       {                                                                 \
474         _n_f2 = X##_f1 >> 1;                                            \
475         _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;          \
476         _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                        \
477       }                                                                 \
478     else                                                                \
479       {                                                                 \
480         R##_e--;                                                        \
481         _n_f2 = X##_f1;                                                 \
482         _n_f1 = X##_f0;                                                 \
483         _n_f0 = 0;                                                      \
484       }                                                                 \
485                                                                         \
486     /* Normalize, i.e. make the most significant bit of the             \
487        denominator set. */                                              \
488     _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);                             \
489                                                                         \
490     udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);                    \
491     umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);                            \
492     _r_f0 = _n_f0;                                                      \
493     if (_FP_FRAC_GT_2(_m, _r))                                          \
494       {                                                                 \
495         R##_f1--;                                                       \
496         _FP_FRAC_ADD_2(_r, Y, _r);                                      \
497         if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
498           {                                                             \
499             R##_f1--;                                                   \
500             _FP_FRAC_ADD_2(_r, Y, _r);                                  \
501           }                                                             \
502       }                                                                 \
503     _FP_FRAC_DEC_2(_r, _m);                                             \
504                                                                         \
505     if (_r_f1 == Y##_f1)                                                \
506       {                                                                 \
507         /* This is a special case, not an optimization                  \
508            (_r/Y##_f1 would not fit into UWtype).                       \
509            As _r is guaranteed to be < Y,  R##_f0 can be either         \
510            (UWtype)-1 or (UWtype)-2.  But as we know what kind          \
511            of bits it is (sticky, guard, round),  we don't care.        \
512            We also don't care what the reminder is,  because the        \
513            guard bit will be set anyway.  -jj */                        \
514         R##_f0 = -1;                                                    \
515       }                                                                 \
516     else                                                                \
517       {                                                                 \
518         udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);                \
519         umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);                        \
520         _r_f0 = 0;                                                      \
521         if (_FP_FRAC_GT_2(_m, _r))                                      \
522           {                                                             \
523             R##_f0--;                                                   \
524             _FP_FRAC_ADD_2(_r, Y, _r);                                  \
525             if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))          \
526               {                                                         \
527                 R##_f0--;                                               \
528                 _FP_FRAC_ADD_2(_r, Y, _r);                              \
529               }                                                         \
530           }                                                             \
531         if (!_FP_FRAC_EQ_2(_r, _m))                                     \
532           R##_f0 |= _FP_WORK_STICKY;                                    \
533       }                                                                 \
534   } while (0)
535
536
537 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)                                 \
538   do {                                                                  \
539     _FP_W_TYPE _x[4], _y[2], _z[4];                                     \
540     _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
541     _x[0] = _x[3] = 0;                                                  \
542     if (_FP_FRAC_GT_2(X, Y))                                            \
543       {                                                                 \
544         R##_e++;                                                        \
545         _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |   \
546                  X##_f1 >> (_FP_W_TYPE_SIZE -                           \
547                             (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
548         _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);     \
549       }                                                                 \
550     else                                                                \
551       {                                                                 \
552         _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |     \
553                  X##_f1 >> (_FP_W_TYPE_SIZE -                           \
554                             (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));   \
555         _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);       \
556       }                                                                 \
557                                                                         \
558     (void) mpn_divrem (_z, 0, _x, 4, _y, 2);                            \
559     R##_f1 = _z[1];                                                     \
560     R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);                            \
561   } while (0)
562
563
564 /*
565  * Square root algorithms:
566  * We have just one right now, maybe Newton approximation
567  * should be added for those machines where division is fast.
568  */
569
570 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                  \
571   do {                                                  \
572     while (q)                                           \
573       {                                                 \
574         T##_f1 = S##_f1 + q;                            \
575         if (T##_f1 <= X##_f1)                           \
576           {                                             \
577             S##_f1 = T##_f1 + q;                        \
578             X##_f1 -= T##_f1;                           \
579             R##_f1 += q;                                \
580           }                                             \
581         _FP_FRAC_SLL_2(X, 1);                           \
582         q >>= 1;                                        \
583       }                                                 \
584     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);         \
585     while (q != _FP_WORK_ROUND)                         \
586       {                                                 \
587         T##_f0 = S##_f0 + q;                            \
588         T##_f1 = S##_f1;                                \
589         if (T##_f1 < X##_f1 ||                          \
590             (T##_f1 == X##_f1 && T##_f0 <= X##_f0))     \
591           {                                             \
592             S##_f0 = T##_f0 + q;                        \
593             S##_f1 += (T##_f0 > S##_f0);                \
594             _FP_FRAC_DEC_2(X, T);                       \
595             R##_f0 += q;                                \
596           }                                             \
597         _FP_FRAC_SLL_2(X, 1);                           \
598         q >>= 1;                                        \
599       }                                                 \
600     if (X##_f0 | X##_f1)                                \
601       {                                                 \
602         if (S##_f1 < X##_f1 ||                          \
603             (S##_f1 == X##_f1 && S##_f0 < X##_f0))      \
604           R##_f0 |= _FP_WORK_ROUND;                     \
605         R##_f0 |= _FP_WORK_STICKY;                      \
606       }                                                 \
607   } while (0)
608
609
610 /*
611  * Assembly/disassembly for converting to/from integral types.
612  * No shifting or overflow handled here.
613  */
614
615 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
616 (void)((rsize <= _FP_W_TYPE_SIZE)               \
617        ? ({ r = X##_f0; })                      \
618        : ({                                     \
619             r = X##_f1;                         \
620             r <<= _FP_W_TYPE_SIZE;              \
621             r += X##_f0;                        \
622           }))
623
624 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
625   do {                                                                  \
626     X##_f0 = r;                                                         \
627     X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);     \
628   } while (0)
629
630 /*
631  * Convert FP values between word sizes
632  */
633
634 #define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
635
636 #define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))
637
638 #define _FP_FRAC_COPY_2_2(D,S)          _FP_FRAC_COPY_2(D,S)