160990fe4e8fe1586c76e2c00c7bce2f1eec0f38
[platform/upstream/glibc.git] / soft-fp / op-2.h
1 /* Software floating-point emulation.
2    Basic two-word fraction declaration and manipulation.
3    Copyright (C) 1997-2014 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Richard Henderson (rth@cygnus.com),
6                   Jakub Jelinek (jj@ultra.linux.cz),
7                   David S. Miller (davem@redhat.com) and
8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10    The GNU C Library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2.1 of the License, or (at your option) any later version.
14
15    In addition to the permissions in the GNU Lesser General Public
16    License, the Free Software Foundation gives you unlimited
17    permission to link the compiled version of this file into
18    combinations with other programs, and to distribute those
19    combinations without any restriction coming from the use of this
20    file.  (The Lesser General Public License restrictions do apply in
21    other respects; for example, they cover modification of the file,
22    and distribution when not linked into a combine executable.)
23
24    The GNU C Library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with the GNU C Library; if not, see
31    <http://www.gnu.org/licenses/>.  */
32
33 #define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
34 #define _FP_FRAC_COPY_2(D, S)   (D##_f0 = S##_f0, D##_f1 = S##_f1)
35 #define _FP_FRAC_SET_2(X, I)    __FP_FRAC_SET_2 (X, I)
36 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
37 #define _FP_FRAC_LOW_2(X)       (X##_f0)
38 #define _FP_FRAC_WORD_2(X, w)   (X##_f##w)
39
40 #define _FP_FRAC_SLL_2(X, N)                                            \
41   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
42           ? ({                                                          \
43               if (__builtin_constant_p (N) && (N) == 1)                 \
44                 {                                                       \
45                   X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
46                   X##_f0 += X##_f0;                                     \
47                 }                                                       \
48               else                                                      \
49                 {                                                       \
50                   X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51                   X##_f0 <<= (N);                                       \
52                 }                                                       \
53               0;                                                        \
54             })                                                          \
55           : ({                                                          \
56               X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);               \
57               X##_f0 = 0;                                               \
58             }))
59
60
61 #define _FP_FRAC_SRL_2(X, N)                                            \
62   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
63           ? ({                                                          \
64               X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65               X##_f1 >>= (N);                                           \
66             })                                                          \
67           : ({                                                          \
68               X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);               \
69               X##_f1 = 0;                                               \
70             }))
71
72 /* Right shift with sticky-lsb.  */
73 #define _FP_FRAC_SRST_2(X, S, N, sz)                                    \
74   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
75           ? ({                                                          \
76               S = (__builtin_constant_p (N) && (N) == 1                 \
77                    ? X##_f0 & 1                                         \
78                    : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);         \
79               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80               X##_f1 >>= (N);                                           \
81             })                                                          \
82           : ({                                                          \
83               S = ((((N) == _FP_W_TYPE_SIZE                             \
84                      ? 0                                                \
85                      : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))           \
86                     | X##_f0) != 0);                                    \
87               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));             \
88               X##_f1 = 0;                                               \
89             }))
90
91 #define _FP_FRAC_SRS_2(X, N, sz)                                        \
92   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
93           ? ({                                                          \
94               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
95                         | (__builtin_constant_p (N) && (N) == 1         \
96                            ? X##_f0 & 1                                 \
97                            : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98               X##_f1 >>= (N);                                           \
99             })                                                          \
100           : ({                                                          \
101               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)               \
102                         | ((((N) == _FP_W_TYPE_SIZE                     \
103                              ? 0                                        \
104                              : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))   \
105                             | X##_f0) != 0));                           \
106               X##_f1 = 0;                                               \
107             }))
108
109 #define _FP_FRAC_ADDI_2(X, I)   \
110   __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
111
112 #define _FP_FRAC_ADD_2(R, X, Y) \
113   __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
114
115 #define _FP_FRAC_SUB_2(R, X, Y) \
116   __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
117
118 #define _FP_FRAC_DEC_2(X, Y)    \
119   __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
120
121 #define _FP_FRAC_CLZ_2(R, X)                    \
122   do                                            \
123     {                                           \
124       if (X##_f1)                               \
125         __FP_CLZ (R, X##_f1);                   \
126       else                                      \
127         {                                       \
128           __FP_CLZ (R, X##_f0);                 \
129           R += _FP_W_TYPE_SIZE;                 \
130         }                                       \
131     }                                           \
132   while (0)
133
134 /* Predicates */
135 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE) X##_f1 < 0)
136 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
137 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X)   (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X)    \
140   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
141 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
142 #define _FP_FRAC_GT_2(X, Y)     \
143   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
144 #define _FP_FRAC_GE_2(X, Y)     \
145   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
146
147 #define _FP_ZEROFRAC_2          0, 0
148 #define _FP_MINFRAC_2           0, 1
149 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
150
151 /*
152  * Internals
153  */
154
155 #define __FP_FRAC_SET_2(X, I1, I0)      (X##_f0 = I0, X##_f1 = I1)
156
157 #define __FP_CLZ_2(R, xh, xl)                   \
158   do                                            \
159     {                                           \
160       if (xh)                                   \
161         __FP_CLZ (R, xh);                       \
162       else                                      \
163         {                                       \
164           __FP_CLZ (R, xl);                     \
165           R += _FP_W_TYPE_SIZE;                 \
166         }                                       \
167     }                                           \
168   while (0)
169
170 #if 0
171
172 # ifndef __FP_FRAC_ADDI_2
173 #  define __FP_FRAC_ADDI_2(xh, xl, i)   \
174   (xh += ((xl += i) < i))
175 # endif
176 # ifndef __FP_FRAC_ADD_2
177 #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)       \
178   (rh = xh + yh + ((rl = xl + yl) < xl))
179 # endif
180 # ifndef __FP_FRAC_SUB_2
181 #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)       \
182   (rh = xh - yh - ((rl = xl - yl) > xl))
183 # endif
184 # ifndef __FP_FRAC_DEC_2
185 #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)       \
186   do                                            \
187     {                                           \
188       UWtype _t = xl;                           \
189       xh -= yh + ((xl -= yl) > _t);             \
190     }                                           \
191   while (0)
192 # endif
193
194 #else
195
196 # undef __FP_FRAC_ADDI_2
197 # define __FP_FRAC_ADDI_2(xh, xl, i)    add_ssaaaa (xh, xl, xh, xl, 0, i)
198 # undef __FP_FRAC_ADD_2
199 # define __FP_FRAC_ADD_2                add_ssaaaa
200 # undef __FP_FRAC_SUB_2
201 # define __FP_FRAC_SUB_2                sub_ddmmss
202 # undef __FP_FRAC_DEC_2
203 # define __FP_FRAC_DEC_2(xh, xl, yh, yl)        \
204   sub_ddmmss (xh, xl, xh, xl, yh, yl)
205
206 #endif
207
208 /*
209  * Unpack the raw bits of a native fp value.  Do not classify or
210  * normalize the data.
211  */
212
213 #define _FP_UNPACK_RAW_2(fs, X, val)            \
214   do                                            \
215     {                                           \
216       union _FP_UNION_##fs _flo;                \
217       _flo.flt = (val);                         \
218                                                 \
219       X##_f0 = _flo.bits.frac0;                 \
220       X##_f1 = _flo.bits.frac1;                 \
221       X##_e  = _flo.bits.exp;                   \
222       X##_s  = _flo.bits.sign;                  \
223     }                                           \
224   while (0)
225
226 #define _FP_UNPACK_RAW_2_P(fs, X, val)                                  \
227   do                                                                    \
228     {                                                                   \
229       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);      \
230                                                                         \
231       X##_f0 = _flo->bits.frac0;                                        \
232       X##_f1 = _flo->bits.frac1;                                        \
233       X##_e  = _flo->bits.exp;                                          \
234       X##_s  = _flo->bits.sign;                                         \
235     }                                                                   \
236   while (0)
237
238
239 /*
240  * Repack the raw bits of a native fp value.
241  */
242
243 #define _FP_PACK_RAW_2(fs, val, X)              \
244   do                                            \
245     {                                           \
246       union _FP_UNION_##fs _flo;                \
247                                                 \
248       _flo.bits.frac0 = X##_f0;                 \
249       _flo.bits.frac1 = X##_f1;                 \
250       _flo.bits.exp   = X##_e;                  \
251       _flo.bits.sign  = X##_s;                  \
252                                                 \
253       (val) = _flo.flt;                         \
254     }                                           \
255   while (0)
256
257 #define _FP_PACK_RAW_2_P(fs, val, X)                                    \
258   do                                                                    \
259     {                                                                   \
260       union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val);      \
261                                                                         \
262       _flo->bits.frac0 = X##_f0;                                        \
263       _flo->bits.frac1 = X##_f1;                                        \
264       _flo->bits.exp   = X##_e;                                         \
265       _flo->bits.sign  = X##_s;                                         \
266     }                                                                   \
267   while (0)
268
269
270 /*
271  * Multiplication algorithms:
272  */
273
274 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
275
276 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)                \
277   do                                                                    \
278     {                                                                   \
279       _FP_FRAC_DECL_2 (_b);                                             \
280       _FP_FRAC_DECL_2 (_c);                                             \
281                                                                         \
282       doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \
283       doit (_b_f1, _b_f0, X##_f0, Y##_f1);                              \
284       doit (_c_f1, _c_f0, X##_f1, Y##_f0);                              \
285       doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), X##_f1, Y##_f1); \
286                                                                         \
287       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
288                        _FP_FRAC_WORD_4 (R, 1), 0, _b_f1, _b_f0,         \
289                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
290                        _FP_FRAC_WORD_4 (R, 1));                         \
291       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
292                        _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0,         \
293                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
294                        _FP_FRAC_WORD_4 (R, 1));                         \
295     }                                                                   \
296   while (0)
297
298 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
299   do                                                                    \
300     {                                                                   \
301       _FP_FRAC_DECL_4 (_z);                                             \
302                                                                         \
303       _FP_MUL_MEAT_DW_2_wide (wfracbits, _z, X, Y, doit);               \
304                                                                         \
305       /* Normalize since we know where the msb of the multiplicands     \
306          were (bit B), we know that the msb of the of the product is    \
307          at either 2B or 2B-1.  */                                      \
308       _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits);                    \
309       R##_f0 = _FP_FRAC_WORD_4 (_z, 0);                                 \
310       R##_f1 = _FP_FRAC_WORD_4 (_z, 1);                                 \
311     }                                                                   \
312   while (0)
313
314 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
315    Do only 3 multiplications instead of four. This one is for machines
316    where multiplication is much more expensive than subtraction.  */
317
318 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)           \
319   do                                                                    \
320     {                                                                   \
321       _FP_FRAC_DECL_2 (_b);                                             \
322       _FP_FRAC_DECL_2 (_c);                                             \
323       _FP_W_TYPE _d;                                                    \
324       int _c1, _c2;                                                     \
325                                                                         \
326       _b_f0 = X##_f0 + X##_f1;                                          \
327       _c1 = _b_f0 < X##_f0;                                             \
328       _b_f1 = Y##_f0 + Y##_f1;                                          \
329       _c2 = _b_f1 < Y##_f0;                                             \
330       doit (_d, _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0);                \
331       doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), _b_f0, _b_f1); \
332       doit (_c_f1, _c_f0, X##_f1, Y##_f1);                              \
333                                                                         \
334       _b_f0 &= -_c2;                                                    \
335       _b_f1 &= -_c1;                                                    \
336       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
337                        _FP_FRAC_WORD_4 (R, 1), (_c1 & _c2), 0, _d,      \
338                        0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
339       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
340                         _b_f0);                                         \
341       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
342                         _b_f1);                                         \
343       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
344                        _FP_FRAC_WORD_4 (R, 1),                          \
345                        0, _d, _FP_FRAC_WORD_4 (R, 0));                  \
346       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
347                        _FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0);        \
348       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
349                        _c_f1, _c_f0,                                    \
350                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
351     }                                                                   \
352   while (0)
353
354 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
355   do                                                                    \
356     {                                                                   \
357       _FP_FRAC_DECL_4 (_z);                                             \
358                                                                         \
359       _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, _z, X, Y, doit);          \
360                                                                         \
361       /* Normalize since we know where the msb of the multiplicands     \
362          were (bit B), we know that the msb of the of the product is    \
363          at either 2B or 2B-1.  */                                      \
364       _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits);                    \
365       R##_f0 = _FP_FRAC_WORD_4 (_z, 0);                                 \
366       R##_f1 = _FP_FRAC_WORD_4 (_z, 1);                                 \
367     }                                                                   \
368   while (0)
369
370 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)       \
371   do                                                    \
372     {                                                   \
373       _FP_W_TYPE _x[2], _y[2];                          \
374       _x[0] = X##_f0;                                   \
375       _x[1] = X##_f1;                                   \
376       _y[0] = Y##_f0;                                   \
377       _y[1] = Y##_f1;                                   \
378                                                         \
379       mpn_mul_n (R##_f, _x, _y, 2);                     \
380     }                                                   \
381   while (0)
382
383 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
384   do                                                                    \
385     {                                                                   \
386       _FP_FRAC_DECL_4 (_z);                                             \
387                                                                         \
388       _FP_MUL_MEAT_DW_2_gmp (wfracbits, _z, X, Y);                      \
389                                                                         \
390       /* Normalize since we know where the msb of the multiplicands     \
391          were (bit B), we know that the msb of the of the product is    \
392          at either 2B or 2B-1.  */                                      \
393       _FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits);                    \
394       R##_f0 = _z_f[0];                                                 \
395       R##_f1 = _z_f[1];                                                 \
396     }                                                                   \
397   while (0)
398
399 /* Do at most 120x120=240 bits multiplication using double floating
400    point multiplication.  This is useful if floating point
401    multiplication has much bigger throughput than integer multiply.
402    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
403    between 106 and 120 only.
404    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
405    SETFETZ is a macro which will disable all FPU exceptions and set rounding
406    towards zero,  RESETFE should optionally reset it back.  */
407
408 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
409   do                                                                    \
410     {                                                                   \
411       static const double _const[] =                                    \
412         {                                                               \
413           /* 2^-24 */ 5.9604644775390625e-08,                           \
414           /* 2^-48 */ 3.5527136788005009e-15,                           \
415           /* 2^-72 */ 2.1175823681357508e-22,                           \
416           /* 2^-96 */ 1.2621774483536189e-29,                           \
417           /* 2^28 */ 2.68435456e+08,                                    \
418           /* 2^4 */ 1.600000e+01,                                       \
419           /* 2^-20 */ 9.5367431640625e-07,                              \
420           /* 2^-44 */ 5.6843418860808015e-14,                           \
421           /* 2^-68 */ 3.3881317890172014e-21,                           \
422           /* 2^-92 */ 2.0194839173657902e-28,                           \
423           /* 2^-116 */ 1.2037062152420224e-35                           \
424         };                                                              \
425       double _a240, _b240, _c240, _d240, _e240, _f240,                  \
426         _g240, _h240, _i240, _j240, _k240;                              \
427       union { double d; UDItype i; } _l240, _m240, _n240, _o240,        \
428                                        _p240, _q240, _r240, _s240;      \
429       UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;             \
430                                                                         \
431       if (wfracbits < 106 || wfracbits > 120)                           \
432         abort ();                                                       \
433                                                                         \
434       setfetz;                                                          \
435                                                                         \
436       _e240 = (double) (long) (X##_f0 & 0xffffff);                      \
437       _j240 = (double) (long) (Y##_f0 & 0xffffff);                      \
438       _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);              \
439       _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);              \
440       _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
441       _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
442       _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);               \
443       _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);               \
444       _a240 = (double) (long) (X##_f1 >> 32);                           \
445       _f240 = (double) (long) (Y##_f1 >> 32);                           \
446       _e240 *= _const[3];                                               \
447       _j240 *= _const[3];                                               \
448       _d240 *= _const[2];                                               \
449       _i240 *= _const[2];                                               \
450       _c240 *= _const[1];                                               \
451       _h240 *= _const[1];                                               \
452       _b240 *= _const[0];                                               \
453       _g240 *= _const[0];                                               \
454       _s240.d =                                                       _e240*_j240; \
455       _r240.d =                                         _d240*_j240 + _e240*_i240; \
456       _q240.d =                           _c240*_j240 + _d240*_i240 + _e240*_h240; \
457       _p240.d =             _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
458       _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
459       _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;  \
460       _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                \
461       _l240.d = _a240*_g240 + _b240*_f240;                              \
462       _k240 =   _a240*_f240;                                            \
463       _r240.d += _s240.d;                                               \
464       _q240.d += _r240.d;                                               \
465       _p240.d += _q240.d;                                               \
466       _o240.d += _p240.d;                                               \
467       _n240.d += _o240.d;                                               \
468       _m240.d += _n240.d;                                               \
469       _l240.d += _m240.d;                                               \
470       _k240 += _l240.d;                                                 \
471       _s240.d -= ((_const[10]+_s240.d)-_const[10]);                     \
472       _r240.d -= ((_const[9]+_r240.d)-_const[9]);                       \
473       _q240.d -= ((_const[8]+_q240.d)-_const[8]);                       \
474       _p240.d -= ((_const[7]+_p240.d)-_const[7]);                       \
475       _o240.d += _const[7];                                             \
476       _n240.d += _const[6];                                             \
477       _m240.d += _const[5];                                             \
478       _l240.d += _const[4];                                             \
479       if (_s240.d != 0.0)                                               \
480         _y240 = 1;                                                      \
481       if (_r240.d != 0.0)                                               \
482         _y240 = 1;                                                      \
483       if (_q240.d != 0.0)                                               \
484         _y240 = 1;                                                      \
485       if (_p240.d != 0.0)                                               \
486         _y240 = 1;                                                      \
487       _t240 = (DItype) _k240;                                           \
488       _u240 = _l240.i;                                                  \
489       _v240 = _m240.i;                                                  \
490       _w240 = _n240.i;                                                  \
491       _x240 = _o240.i;                                                  \
492       R##_f1 = ((_t240 << (128 - (wfracbits - 1)))                      \
493                 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));     \
494       R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))         \
495                 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))       \
496                 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))       \
497                 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))        \
498                 | _y240);                                               \
499       resetfe;                                                          \
500     }                                                                   \
501   while (0)
502
503 /*
504  * Division algorithms:
505  */
506
507 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
508   do                                                                    \
509     {                                                                   \
510       _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;       \
511       if (_FP_FRAC_GE_2 (X, Y))                                         \
512         {                                                               \
513           _n_f2 = X##_f1 >> 1;                                          \
514           _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;        \
515           _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                      \
516         }                                                               \
517       else                                                              \
518         {                                                               \
519           R##_e--;                                                      \
520           _n_f2 = X##_f1;                                               \
521           _n_f1 = X##_f0;                                               \
522           _n_f0 = 0;                                                    \
523         }                                                               \
524                                                                         \
525       /* Normalize, i.e. make the most significant bit of the           \
526          denominator set. */                                            \
527       _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);                          \
528                                                                         \
529       udiv_qrnnd (R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);                 \
530       umul_ppmm (_m_f1, _m_f0, R##_f1, Y##_f0);                         \
531       _r_f0 = _n_f0;                                                    \
532       if (_FP_FRAC_GT_2 (_m, _r))                                       \
533         {                                                               \
534           R##_f1--;                                                     \
535           _FP_FRAC_ADD_2 (_r, Y, _r);                                   \
536           if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r))          \
537             {                                                           \
538               R##_f1--;                                                 \
539               _FP_FRAC_ADD_2 (_r, Y, _r);                               \
540             }                                                           \
541         }                                                               \
542       _FP_FRAC_DEC_2 (_r, _m);                                          \
543                                                                         \
544       if (_r_f1 == Y##_f1)                                              \
545         {                                                               \
546           /* This is a special case, not an optimization                \
547              (_r/Y##_f1 would not fit into UWtype).                     \
548              As _r is guaranteed to be < Y,  R##_f0 can be either       \
549              (UWtype)-1 or (UWtype)-2.  But as we know what kind        \
550              of bits it is (sticky, guard, round),  we don't care.      \
551              We also don't care what the reminder is,  because the      \
552              guard bit will be set anyway.  -jj */                      \
553           R##_f0 = -1;                                                  \
554         }                                                               \
555       else                                                              \
556         {                                                               \
557           udiv_qrnnd (R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);             \
558           umul_ppmm (_m_f1, _m_f0, R##_f0, Y##_f0);                     \
559           _r_f0 = 0;                                                    \
560           if (_FP_FRAC_GT_2 (_m, _r))                                   \
561             {                                                           \
562               R##_f0--;                                                 \
563               _FP_FRAC_ADD_2 (_r, Y, _r);                               \
564               if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r))      \
565                 {                                                       \
566                   R##_f0--;                                             \
567                   _FP_FRAC_ADD_2 (_r, Y, _r);                           \
568                 }                                                       \
569             }                                                           \
570           if (!_FP_FRAC_EQ_2 (_r, _m))                                  \
571             R##_f0 |= _FP_WORK_STICKY;                                  \
572         }                                                               \
573     }                                                                   \
574   while (0)
575
576
577 /*
578  * Square root algorithms:
579  * We have just one right now, maybe Newton approximation
580  * should be added for those machines where division is fast.
581  */
582
583 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                          \
584   do                                                            \
585     {                                                           \
586       while (q)                                                 \
587         {                                                       \
588           T##_f1 = S##_f1 + q;                                  \
589           if (T##_f1 <= X##_f1)                                 \
590             {                                                   \
591               S##_f1 = T##_f1 + q;                              \
592               X##_f1 -= T##_f1;                                 \
593               R##_f1 += q;                                      \
594             }                                                   \
595           _FP_FRAC_SLL_2 (X, 1);                                \
596           q >>= 1;                                              \
597         }                                                       \
598       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);              \
599       while (q != _FP_WORK_ROUND)                               \
600         {                                                       \
601           T##_f0 = S##_f0 + q;                                  \
602           T##_f1 = S##_f1;                                      \
603           if (T##_f1 < X##_f1                                   \
604               || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))        \
605             {                                                   \
606               S##_f0 = T##_f0 + q;                              \
607               S##_f1 += (T##_f0 > S##_f0);                      \
608               _FP_FRAC_DEC_2 (X, T);                            \
609               R##_f0 += q;                                      \
610             }                                                   \
611           _FP_FRAC_SLL_2 (X, 1);                                \
612           q >>= 1;                                              \
613         }                                                       \
614       if (X##_f0 | X##_f1)                                      \
615         {                                                       \
616           if (S##_f1 < X##_f1                                   \
617               || (S##_f1 == X##_f1 && S##_f0 < X##_f0))         \
618             R##_f0 |= _FP_WORK_ROUND;                           \
619           R##_f0 |= _FP_WORK_STICKY;                            \
620         }                                                       \
621     }                                                           \
622   while (0)
623
624
625 /*
626  * Assembly/disassembly for converting to/from integral types.
627  * No shifting or overflow handled here.
628  */
629
630 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
631   (void) ((rsize <= _FP_W_TYPE_SIZE)            \
632           ? ({ r = X##_f0; })                   \
633           : ({                                  \
634               r = X##_f1;                       \
635               r <<= _FP_W_TYPE_SIZE;            \
636               r += X##_f0;                      \
637             }))
638
639 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
640   do                                                                    \
641     {                                                                   \
642       X##_f0 = r;                                                       \
643       X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
644     }                                                                   \
645   while (0)
646
647 /*
648  * Convert FP values between word sizes
649  */
650
651 #define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
652
653 #define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))
654
655 #define _FP_FRAC_COPY_2_2(D, S)         _FP_FRAC_COPY_2 (D, S)