Rename soft-fp op-[1248].h variables to avoid risk of shadowing.
[platform/upstream/glibc.git] / soft-fp / op-2.h
1 /* Software floating-point emulation.
2    Basic two-word fraction declaration and manipulation.
3    Copyright (C) 1997-2014 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Richard Henderson (rth@cygnus.com),
6                   Jakub Jelinek (jj@ultra.linux.cz),
7                   David S. Miller (davem@redhat.com) and
8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10    The GNU C Library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2.1 of the License, or (at your option) any later version.
14
15    In addition to the permissions in the GNU Lesser General Public
16    License, the Free Software Foundation gives you unlimited
17    permission to link the compiled version of this file into
18    combinations with other programs, and to distribute those
19    combinations without any restriction coming from the use of this
20    file.  (The Lesser General Public License restrictions do apply in
21    other respects; for example, they cover modification of the file,
22    and distribution when not linked into a combine executable.)
23
24    The GNU C Library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with the GNU C Library; if not, see
31    <http://www.gnu.org/licenses/>.  */
32
33 #define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
34 #define _FP_FRAC_COPY_2(D, S)   (D##_f0 = S##_f0, D##_f1 = S##_f1)
35 #define _FP_FRAC_SET_2(X, I)    __FP_FRAC_SET_2 (X, I)
36 #define _FP_FRAC_HIGH_2(X)      (X##_f1)
37 #define _FP_FRAC_LOW_2(X)       (X##_f0)
38 #define _FP_FRAC_WORD_2(X, w)   (X##_f##w)
39
40 #define _FP_FRAC_SLL_2(X, N)                                            \
41   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
42           ? ({                                                          \
43               if (__builtin_constant_p (N) && (N) == 1)                 \
44                 {                                                       \
45                   X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
46                   X##_f0 += X##_f0;                                     \
47                 }                                                       \
48               else                                                      \
49                 {                                                       \
50                   X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51                   X##_f0 <<= (N);                                       \
52                 }                                                       \
53               0;                                                        \
54             })                                                          \
55           : ({                                                          \
56               X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);               \
57               X##_f0 = 0;                                               \
58             }))
59
60
61 #define _FP_FRAC_SRL_2(X, N)                                            \
62   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
63           ? ({                                                          \
64               X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65               X##_f1 >>= (N);                                           \
66             })                                                          \
67           : ({                                                          \
68               X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);               \
69               X##_f1 = 0;                                               \
70             }))
71
72 /* Right shift with sticky-lsb.  */
73 #define _FP_FRAC_SRST_2(X, S, N, sz)                                    \
74   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
75           ? ({                                                          \
76               S = (__builtin_constant_p (N) && (N) == 1                 \
77                    ? X##_f0 & 1                                         \
78                    : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);         \
79               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80               X##_f1 >>= (N);                                           \
81             })                                                          \
82           : ({                                                          \
83               S = ((((N) == _FP_W_TYPE_SIZE                             \
84                      ? 0                                                \
85                      : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))           \
86                     | X##_f0) != 0);                                    \
87               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));             \
88               X##_f1 = 0;                                               \
89             }))
90
91 #define _FP_FRAC_SRS_2(X, N, sz)                                        \
92   (void) (((N) < _FP_W_TYPE_SIZE)                                       \
93           ? ({                                                          \
94               X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
95                         | (__builtin_constant_p (N) && (N) == 1         \
96                            ? X##_f0 & 1                                 \
97                            : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98               X##_f1 >>= (N);                                           \
99             })                                                          \
100           : ({                                                          \
101               X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)               \
102                         | ((((N) == _FP_W_TYPE_SIZE                     \
103                              ? 0                                        \
104                              : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))   \
105                             | X##_f0) != 0));                           \
106               X##_f1 = 0;                                               \
107             }))
108
109 #define _FP_FRAC_ADDI_2(X, I)   \
110   __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
111
112 #define _FP_FRAC_ADD_2(R, X, Y) \
113   __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
114
115 #define _FP_FRAC_SUB_2(R, X, Y) \
116   __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
117
118 #define _FP_FRAC_DEC_2(X, Y)    \
119   __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
120
121 #define _FP_FRAC_CLZ_2(R, X)                    \
122   do                                            \
123     {                                           \
124       if (X##_f1)                               \
125         __FP_CLZ (R, X##_f1);                   \
126       else                                      \
127         {                                       \
128           __FP_CLZ (R, X##_f0);                 \
129           R += _FP_W_TYPE_SIZE;                 \
130         }                                       \
131     }                                           \
132   while (0)
133
134 /* Predicates */
135 #define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE) X##_f1 < 0)
136 #define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
137 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X)   (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X)    \
140   (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
141 #define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
142 #define _FP_FRAC_GT_2(X, Y)     \
143   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
144 #define _FP_FRAC_GE_2(X, Y)     \
145   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
146
147 #define _FP_ZEROFRAC_2          0, 0
148 #define _FP_MINFRAC_2           0, 1
149 #define _FP_MAXFRAC_2           (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
150
151 /*
152  * Internals
153  */
154
155 #define __FP_FRAC_SET_2(X, I1, I0)      (X##_f0 = I0, X##_f1 = I1)
156
157 #define __FP_CLZ_2(R, xh, xl)                   \
158   do                                            \
159     {                                           \
160       if (xh)                                   \
161         __FP_CLZ (R, xh);                       \
162       else                                      \
163         {                                       \
164           __FP_CLZ (R, xl);                     \
165           R += _FP_W_TYPE_SIZE;                 \
166         }                                       \
167     }                                           \
168   while (0)
169
170 #if 0
171
172 # ifndef __FP_FRAC_ADDI_2
173 #  define __FP_FRAC_ADDI_2(xh, xl, i)   \
174   (xh += ((xl += i) < i))
175 # endif
176 # ifndef __FP_FRAC_ADD_2
177 #  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)       \
178   (rh = xh + yh + ((rl = xl + yl) < xl))
179 # endif
180 # ifndef __FP_FRAC_SUB_2
181 #  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)       \
182   (rh = xh - yh - ((rl = xl - yl) > xl))
183 # endif
184 # ifndef __FP_FRAC_DEC_2
185 #  define __FP_FRAC_DEC_2(xh, xl, yh, yl)               \
186   do                                                    \
187     {                                                   \
188       UWtype __FP_FRAC_DEC_2_t = xl;                    \
189       xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t);      \
190     }                                                   \
191   while (0)
192 # endif
193
194 #else
195
196 # undef __FP_FRAC_ADDI_2
197 # define __FP_FRAC_ADDI_2(xh, xl, i)    add_ssaaaa (xh, xl, xh, xl, 0, i)
198 # undef __FP_FRAC_ADD_2
199 # define __FP_FRAC_ADD_2                add_ssaaaa
200 # undef __FP_FRAC_SUB_2
201 # define __FP_FRAC_SUB_2                sub_ddmmss
202 # undef __FP_FRAC_DEC_2
203 # define __FP_FRAC_DEC_2(xh, xl, yh, yl)        \
204   sub_ddmmss (xh, xl, xh, xl, yh, yl)
205
206 #endif
207
208 /*
209  * Unpack the raw bits of a native fp value.  Do not classify or
210  * normalize the data.
211  */
212
213 #define _FP_UNPACK_RAW_2(fs, X, val)                    \
214   do                                                    \
215     {                                                   \
216       union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo;        \
217       _FP_UNPACK_RAW_2_flo.flt = (val);                 \
218                                                         \
219       X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0;         \
220       X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1;         \
221       X##_e  = _FP_UNPACK_RAW_2_flo.bits.exp;           \
222       X##_s  = _FP_UNPACK_RAW_2_flo.bits.sign;          \
223     }                                                   \
224   while (0)
225
226 #define _FP_UNPACK_RAW_2_P(fs, X, val)                  \
227   do                                                    \
228     {                                                   \
229       union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo      \
230         = (union _FP_UNION_##fs *) (val);               \
231                                                         \
232       X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0;      \
233       X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1;      \
234       X##_e  = _FP_UNPACK_RAW_2_P_flo->bits.exp;        \
235       X##_s  = _FP_UNPACK_RAW_2_P_flo->bits.sign;       \
236     }                                                   \
237   while (0)
238
239
240 /*
241  * Repack the raw bits of a native fp value.
242  */
243
244 #define _FP_PACK_RAW_2(fs, val, X)              \
245   do                                            \
246     {                                           \
247       union _FP_UNION_##fs _FP_PACK_RAW_2_flo;  \
248                                                 \
249       _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0;   \
250       _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1;   \
251       _FP_PACK_RAW_2_flo.bits.exp   = X##_e;    \
252       _FP_PACK_RAW_2_flo.bits.sign  = X##_s;    \
253                                                 \
254       (val) = _FP_PACK_RAW_2_flo.flt;           \
255     }                                           \
256   while (0)
257
258 #define _FP_PACK_RAW_2_P(fs, val, X)                    \
259   do                                                    \
260     {                                                   \
261       union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo        \
262         = (union _FP_UNION_##fs *) (val);               \
263                                                         \
264       _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0;        \
265       _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1;        \
266       _FP_PACK_RAW_2_P_flo->bits.exp   = X##_e;         \
267       _FP_PACK_RAW_2_P_flo->bits.sign  = X##_s;         \
268     }                                                   \
269   while (0)
270
271
272 /*
273  * Multiplication algorithms:
274  */
275
276 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
277
278 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)                \
279   do                                                                    \
280     {                                                                   \
281       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b);                       \
282       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c);                       \
283                                                                         \
284       doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0),             \
285             X##_f0, Y##_f0);                                            \
286       doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0,   \
287             X##_f0, Y##_f1);                                            \
288       doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0,   \
289             X##_f1, Y##_f0);                                            \
290       doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),             \
291             X##_f1, Y##_f1);                                            \
292                                                                         \
293       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
294                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
295                        _FP_MUL_MEAT_DW_2_wide_b_f1,                     \
296                        _FP_MUL_MEAT_DW_2_wide_b_f0,                     \
297                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
298                        _FP_FRAC_WORD_4 (R, 1));                         \
299       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
300                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
301                        _FP_MUL_MEAT_DW_2_wide_c_f1,                     \
302                        _FP_MUL_MEAT_DW_2_wide_c_f0,                     \
303                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
304                        _FP_FRAC_WORD_4 (R, 1));                         \
305     }                                                                   \
306   while (0)
307
308 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)                   \
309   do                                                                    \
310     {                                                                   \
311       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z);                          \
312                                                                         \
313       _FP_MUL_MEAT_DW_2_wide (wfracbits, _FP_MUL_MEAT_2_wide_z,         \
314                               X, Y, doit);                              \
315                                                                         \
316       /* Normalize since we know where the msb of the multiplicands     \
317          were (bit B), we know that the msb of the of the product is    \
318          at either 2B or 2B-1.  */                                      \
319       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, wfracbits-1, 2*wfracbits); \
320       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0);              \
321       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1);              \
322     }                                                                   \
323   while (0)
324
325 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
326    Do only 3 multiplications instead of four. This one is for machines
327    where multiplication is much more expensive than subtraction.  */
328
329 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)           \
330   do                                                                    \
331     {                                                                   \
332       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b);                  \
333       _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c);                  \
334       _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d;                         \
335       int _FP_MUL_MEAT_DW_2_wide_3mul_c1;                               \
336       int _FP_MUL_MEAT_DW_2_wide_3mul_c2;                               \
337                                                                         \
338       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1;               \
339       _FP_MUL_MEAT_DW_2_wide_3mul_c1                                    \
340         = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0;                    \
341       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1;               \
342       _FP_MUL_MEAT_DW_2_wide_3mul_c2                                    \
343         = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0;                    \
344       doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0),      \
345             X##_f0, Y##_f0);                                            \
346       doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1),             \
347             _FP_MUL_MEAT_DW_2_wide_3mul_b_f0,                           \
348             _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);                          \
349       doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                           \
350             _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1);          \
351                                                                         \
352       _FP_MUL_MEAT_DW_2_wide_3mul_b_f0                                  \
353         &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2;                             \
354       _FP_MUL_MEAT_DW_2_wide_3mul_b_f1                                  \
355         &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1;                             \
356       __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
357                        _FP_FRAC_WORD_4 (R, 1),                          \
358                        (_FP_MUL_MEAT_DW_2_wide_3mul_c1                  \
359                         & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0,           \
360                        _FP_MUL_MEAT_DW_2_wide_3mul_d,                   \
361                        0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
362       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
363                         _FP_MUL_MEAT_DW_2_wide_3mul_b_f0);              \
364       __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
365                         _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);              \
366       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
367                        _FP_FRAC_WORD_4 (R, 1),                          \
368                        0, _FP_MUL_MEAT_DW_2_wide_3mul_d,                \
369                        _FP_FRAC_WORD_4 (R, 0));                         \
370       __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
371                        _FP_FRAC_WORD_4 (R, 1), 0,                       \
372                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                \
373                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f0);               \
374       __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),  \
375                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,                \
376                        _FP_MUL_MEAT_DW_2_wide_3mul_c_f0,                \
377                        _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
378     }                                                                   \
379   while (0)
380
381 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)              \
382   do                                                                    \
383     {                                                                   \
384       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z);                     \
385                                                                         \
386       _FP_MUL_MEAT_DW_2_wide_3mul (wfracbits,                           \
387                                    _FP_MUL_MEAT_2_wide_3mul_z,          \
388                                    X, Y, doit);                         \
389                                                                         \
390       /* Normalize since we know where the msb of the multiplicands     \
391          were (bit B), we know that the msb of the of the product is    \
392          at either 2B or 2B-1.  */                                      \
393       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z,                       \
394                       wfracbits-1, 2*wfracbits);                        \
395       R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0);         \
396       R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1);         \
397     }                                                                   \
398   while (0)
399
400 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)       \
401   do                                                    \
402     {                                                   \
403       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2];            \
404       _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2];            \
405       _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0;              \
406       _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1;              \
407       _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0;              \
408       _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1;              \
409                                                         \
410       mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x,        \
411                  _FP_MUL_MEAT_DW_2_gmp_y, 2);           \
412     }                                                   \
413   while (0)
414
415 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)                          \
416   do                                                                    \
417     {                                                                   \
418       _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z);                           \
419                                                                         \
420       _FP_MUL_MEAT_DW_2_gmp (wfracbits, _FP_MUL_MEAT_2_gmp_z, X, Y);    \
421                                                                         \
422       /* Normalize since we know where the msb of the multiplicands     \
423          were (bit B), we know that the msb of the of the product is    \
424          at either 2B or 2B-1.  */                                      \
425       _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, wfracbits-1, 2*wfracbits);  \
426       R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0];                               \
427       R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1];                               \
428     }                                                                   \
429   while (0)
430
431 /* Do at most 120x120=240 bits multiplication using double floating
432    point multiplication.  This is useful if floating point
433    multiplication has much bigger throughput than integer multiply.
434    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
435    between 106 and 120 only.
436    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
437    SETFETZ is a macro which will disable all FPU exceptions and set rounding
438    towards zero,  RESETFE should optionally reset it back.  */
439
440 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
441   do                                                                    \
442     {                                                                   \
443       static const double _const[] =                                    \
444         {                                                               \
445           /* 2^-24 */ 5.9604644775390625e-08,                           \
446           /* 2^-48 */ 3.5527136788005009e-15,                           \
447           /* 2^-72 */ 2.1175823681357508e-22,                           \
448           /* 2^-96 */ 1.2621774483536189e-29,                           \
449           /* 2^28 */ 2.68435456e+08,                                    \
450           /* 2^4 */ 1.600000e+01,                                       \
451           /* 2^-20 */ 9.5367431640625e-07,                              \
452           /* 2^-44 */ 5.6843418860808015e-14,                           \
453           /* 2^-68 */ 3.3881317890172014e-21,                           \
454           /* 2^-92 */ 2.0194839173657902e-28,                           \
455           /* 2^-116 */ 1.2037062152420224e-35                           \
456         };                                                              \
457       double _a240, _b240, _c240, _d240, _e240, _f240,                  \
458         _g240, _h240, _i240, _j240, _k240;                              \
459       union { double d; UDItype i; } _l240, _m240, _n240, _o240,        \
460                                        _p240, _q240, _r240, _s240;      \
461       UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;             \
462                                                                         \
463       if (wfracbits < 106 || wfracbits > 120)                           \
464         abort ();                                                       \
465                                                                         \
466       setfetz;                                                          \
467                                                                         \
468       _e240 = (double) (long) (X##_f0 & 0xffffff);                      \
469       _j240 = (double) (long) (Y##_f0 & 0xffffff);                      \
470       _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);              \
471       _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);              \
472       _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
473       _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
474       _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);               \
475       _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);               \
476       _a240 = (double) (long) (X##_f1 >> 32);                           \
477       _f240 = (double) (long) (Y##_f1 >> 32);                           \
478       _e240 *= _const[3];                                               \
479       _j240 *= _const[3];                                               \
480       _d240 *= _const[2];                                               \
481       _i240 *= _const[2];                                               \
482       _c240 *= _const[1];                                               \
483       _h240 *= _const[1];                                               \
484       _b240 *= _const[0];                                               \
485       _g240 *= _const[0];                                               \
486       _s240.d =                                                       _e240*_j240; \
487       _r240.d =                                         _d240*_j240 + _e240*_i240; \
488       _q240.d =                           _c240*_j240 + _d240*_i240 + _e240*_h240; \
489       _p240.d =             _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
490       _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
491       _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;  \
492       _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;                \
493       _l240.d = _a240*_g240 + _b240*_f240;                              \
494       _k240 =   _a240*_f240;                                            \
495       _r240.d += _s240.d;                                               \
496       _q240.d += _r240.d;                                               \
497       _p240.d += _q240.d;                                               \
498       _o240.d += _p240.d;                                               \
499       _n240.d += _o240.d;                                               \
500       _m240.d += _n240.d;                                               \
501       _l240.d += _m240.d;                                               \
502       _k240 += _l240.d;                                                 \
503       _s240.d -= ((_const[10]+_s240.d)-_const[10]);                     \
504       _r240.d -= ((_const[9]+_r240.d)-_const[9]);                       \
505       _q240.d -= ((_const[8]+_q240.d)-_const[8]);                       \
506       _p240.d -= ((_const[7]+_p240.d)-_const[7]);                       \
507       _o240.d += _const[7];                                             \
508       _n240.d += _const[6];                                             \
509       _m240.d += _const[5];                                             \
510       _l240.d += _const[4];                                             \
511       if (_s240.d != 0.0)                                               \
512         _y240 = 1;                                                      \
513       if (_r240.d != 0.0)                                               \
514         _y240 = 1;                                                      \
515       if (_q240.d != 0.0)                                               \
516         _y240 = 1;                                                      \
517       if (_p240.d != 0.0)                                               \
518         _y240 = 1;                                                      \
519       _t240 = (DItype) _k240;                                           \
520       _u240 = _l240.i;                                                  \
521       _v240 = _m240.i;                                                  \
522       _w240 = _n240.i;                                                  \
523       _x240 = _o240.i;                                                  \
524       R##_f1 = ((_t240 << (128 - (wfracbits - 1)))                      \
525                 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));     \
526       R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))         \
527                 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))       \
528                 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))       \
529                 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))        \
530                 | _y240);                                               \
531       resetfe;                                                          \
532     }                                                                   \
533   while (0)
534
535 /*
536  * Division algorithms:
537  */
538
539 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                                \
540   do                                                                    \
541     {                                                                   \
542       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2;                              \
543       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1;                              \
544       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0;                              \
545       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1;                              \
546       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0;                              \
547       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1;                              \
548       _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0;                              \
549       if (_FP_FRAC_GE_2 (X, Y))                                         \
550         {                                                               \
551           _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1;                       \
552           _FP_DIV_MEAT_2_udiv_n_f1                                      \
553             = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;            \
554           _FP_DIV_MEAT_2_udiv_n_f0                                      \
555             = X##_f0 << (_FP_W_TYPE_SIZE - 1);                          \
556         }                                                               \
557       else                                                              \
558         {                                                               \
559           R##_e--;                                                      \
560           _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1;                            \
561           _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0;                            \
562           _FP_DIV_MEAT_2_udiv_n_f0 = 0;                                 \
563         }                                                               \
564                                                                         \
565       /* Normalize, i.e. make the most significant bit of the           \
566          denominator set. */                                            \
567       _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);                          \
568                                                                         \
569       udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1,                     \
570                   _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1,   \
571                   Y##_f1);                                              \
572       umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0,    \
573                  R##_f1, Y##_f0);                                       \
574       _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0;              \
575       if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
576         {                                                               \
577           R##_f1--;                                                     \
578           _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                     \
579                           _FP_DIV_MEAT_2_udiv_r);                       \
580           if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)                  \
581               && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                  \
582                                 _FP_DIV_MEAT_2_udiv_r))                 \
583             {                                                           \
584               R##_f1--;                                                 \
585               _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                 \
586                               _FP_DIV_MEAT_2_udiv_r);                   \
587             }                                                           \
588         }                                                               \
589       _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m);    \
590                                                                         \
591       if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1)                           \
592         {                                                               \
593           /* This is a special case, not an optimization                \
594              (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype).  \
595              As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y,          \
596              R##_f0 can be either (UWtype)-1 or (UWtype)-2.  But as we  \
597              know what kind of bits it is (sticky, guard, round),       \
598              we don't care.  We also don't care what the reminder is,   \
599              because the guard bit will be set anyway.  -jj */          \
600           R##_f0 = -1;                                                  \
601         }                                                               \
602       else                                                              \
603         {                                                               \
604           udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1,                 \
605                       _FP_DIV_MEAT_2_udiv_r_f1,                         \
606                       _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1);                \
607           umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1,                          \
608                      _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0);         \
609           _FP_DIV_MEAT_2_udiv_r_f0 = 0;                                 \
610           if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,                     \
611                              _FP_DIV_MEAT_2_udiv_r))                    \
612             {                                                           \
613               R##_f0--;                                                 \
614               _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,                 \
615                               _FP_DIV_MEAT_2_udiv_r);                   \
616               if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)              \
617                   && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,              \
618                                     _FP_DIV_MEAT_2_udiv_r))             \
619                 {                                                       \
620                   R##_f0--;                                             \
621                   _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,             \
622                                   _FP_DIV_MEAT_2_udiv_r);               \
623                 }                                                       \
624             }                                                           \
625           if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r,                    \
626                               _FP_DIV_MEAT_2_udiv_m))                   \
627             R##_f0 |= _FP_WORK_STICKY;                                  \
628         }                                                               \
629     }                                                                   \
630   while (0)
631
632
633 /*
634  * Square root algorithms:
635  * We have just one right now, maybe Newton approximation
636  * should be added for those machines where division is fast.
637  */
638
639 #define _FP_SQRT_MEAT_2(R, S, T, X, q)                          \
640   do                                                            \
641     {                                                           \
642       while (q)                                                 \
643         {                                                       \
644           T##_f1 = S##_f1 + q;                                  \
645           if (T##_f1 <= X##_f1)                                 \
646             {                                                   \
647               S##_f1 = T##_f1 + q;                              \
648               X##_f1 -= T##_f1;                                 \
649               R##_f1 += q;                                      \
650             }                                                   \
651           _FP_FRAC_SLL_2 (X, 1);                                \
652           q >>= 1;                                              \
653         }                                                       \
654       q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);              \
655       while (q != _FP_WORK_ROUND)                               \
656         {                                                       \
657           T##_f0 = S##_f0 + q;                                  \
658           T##_f1 = S##_f1;                                      \
659           if (T##_f1 < X##_f1                                   \
660               || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))        \
661             {                                                   \
662               S##_f0 = T##_f0 + q;                              \
663               S##_f1 += (T##_f0 > S##_f0);                      \
664               _FP_FRAC_DEC_2 (X, T);                            \
665               R##_f0 += q;                                      \
666             }                                                   \
667           _FP_FRAC_SLL_2 (X, 1);                                \
668           q >>= 1;                                              \
669         }                                                       \
670       if (X##_f0 | X##_f1)                                      \
671         {                                                       \
672           if (S##_f1 < X##_f1                                   \
673               || (S##_f1 == X##_f1 && S##_f0 < X##_f0))         \
674             R##_f0 |= _FP_WORK_ROUND;                           \
675           R##_f0 |= _FP_WORK_STICKY;                            \
676         }                                                       \
677     }                                                           \
678   while (0)
679
680
681 /*
682  * Assembly/disassembly for converting to/from integral types.
683  * No shifting or overflow handled here.
684  */
685
686 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
687   (void) ((rsize <= _FP_W_TYPE_SIZE)            \
688           ? ({ r = X##_f0; })                   \
689           : ({                                  \
690               r = X##_f1;                       \
691               r <<= _FP_W_TYPE_SIZE;            \
692               r += X##_f0;                      \
693             }))
694
695 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
696   do                                                                    \
697     {                                                                   \
698       X##_f0 = r;                                                       \
699       X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
700     }                                                                   \
701   while (0)
702
703 /*
704  * Convert FP values between word sizes
705  */
706
707 #define _FP_FRAC_COPY_1_2(D, S)         (D##_f = S##_f0)
708
709 #define _FP_FRAC_COPY_2_1(D, S)         ((D##_f0 = S##_f), (D##_f1 = 0))
710
711 #define _FP_FRAC_COPY_2_2(D, S)         _FP_FRAC_COPY_2 (D, S)