Replace FSF snail mail address with URLs.
[platform/upstream/glibc.git] / soft-fp / op-4.h
1 /* Software floating-point emulation.
2    Basic four-word fraction declaration and manipulation.
3    Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Richard Henderson (rth@cygnus.com),
6                   Jakub Jelinek (jj@ultra.linux.cz),
7                   David S. Miller (davem@redhat.com) and
8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10    The GNU C Library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2.1 of the License, or (at your option) any later version.
14
15    In addition to the permissions in the GNU Lesser General Public
16    License, the Free Software Foundation gives you unlimited
17    permission to link the compiled version of this file into
18    combinations with other programs, and to distribute those
19    combinations without any restriction coming from the use of this
20    file.  (The Lesser General Public License restrictions do apply in
21    other respects; for example, they cover modification of the file,
22    and distribution when not linked into a combine executable.)
23
24    The GNU C Library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with the GNU C Library; if not, see
31    <http://www.gnu.org/licenses/>.  */
32
33 #define _FP_FRAC_DECL_4(X)      _FP_W_TYPE X##_f[4]
34 #define _FP_FRAC_COPY_4(D,S)                    \
35   (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],    \
36    D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
37 #define _FP_FRAC_SET_4(X,I)     __FP_FRAC_SET_4(X, I)
38 #define _FP_FRAC_HIGH_4(X)      (X##_f[3])
39 #define _FP_FRAC_LOW_4(X)       (X##_f[0])
40 #define _FP_FRAC_WORD_4(X,w)    (X##_f[w])
41
42 #define _FP_FRAC_SLL_4(X,N)                                             \
43   do {                                                                  \
44     _FP_I_TYPE _up, _down, _skip, _i;                                   \
45     _skip = (N) / _FP_W_TYPE_SIZE;                                      \
46     _up = (N) % _FP_W_TYPE_SIZE;                                        \
47     _down = _FP_W_TYPE_SIZE - _up;                                      \
48     if (!_up)                                                           \
49       for (_i = 3; _i >= _skip; --_i)                                   \
50         X##_f[_i] = X##_f[_i-_skip];                                    \
51     else                                                                \
52       {                                                                 \
53         for (_i = 3; _i > _skip; --_i)                                  \
54           X##_f[_i] = X##_f[_i-_skip] << _up                            \
55                       | X##_f[_i-_skip-1] >> _down;                     \
56         X##_f[_i--] = X##_f[0] << _up;                                  \
57       }                                                                 \
58     for (; _i >= 0; --_i)                                               \
59       X##_f[_i] = 0;                                                    \
60   } while (0)
61
62 /* This one was broken too */
63 #define _FP_FRAC_SRL_4(X,N)                                             \
64   do {                                                                  \
65     _FP_I_TYPE _up, _down, _skip, _i;                                   \
66     _skip = (N) / _FP_W_TYPE_SIZE;                                      \
67     _down = (N) % _FP_W_TYPE_SIZE;                                      \
68     _up = _FP_W_TYPE_SIZE - _down;                                      \
69     if (!_down)                                                         \
70       for (_i = 0; _i <= 3-_skip; ++_i)                                 \
71         X##_f[_i] = X##_f[_i+_skip];                                    \
72     else                                                                \
73       {                                                                 \
74         for (_i = 0; _i < 3-_skip; ++_i)                                \
75           X##_f[_i] = X##_f[_i+_skip] >> _down                          \
76                       | X##_f[_i+_skip+1] << _up;                       \
77         X##_f[_i++] = X##_f[3] >> _down;                                \
78       }                                                                 \
79     for (; _i < 4; ++_i)                                                \
80       X##_f[_i] = 0;                                                    \
81   } while (0)
82
83
84 /* Right shift with sticky-lsb. 
85  * What this actually means is that we do a standard right-shift,
86  * but that if any of the bits that fall off the right hand side
87  * were one then we always set the LSbit.
88  */
89 #define _FP_FRAC_SRST_4(X,S,N,size)                     \
90   do {                                                  \
91     _FP_I_TYPE _up, _down, _skip, _i;                   \
92     _FP_W_TYPE _s;                                      \
93     _skip = (N) / _FP_W_TYPE_SIZE;                      \
94     _down = (N) % _FP_W_TYPE_SIZE;                      \
95     _up = _FP_W_TYPE_SIZE - _down;                      \
96     for (_s = _i = 0; _i < _skip; ++_i)                 \
97       _s |= X##_f[_i];                                  \
98     if (!_down)                                         \
99       for (_i = 0; _i <= 3-_skip; ++_i)                 \
100         X##_f[_i] = X##_f[_i+_skip];                    \
101     else                                                \
102       {                                                 \
103         _s |= X##_f[_i] << _up;                         \
104         for (_i = 0; _i < 3-_skip; ++_i)                \
105           X##_f[_i] = X##_f[_i+_skip] >> _down          \
106                       | X##_f[_i+_skip+1] << _up;       \
107         X##_f[_i++] = X##_f[3] >> _down;                \
108       }                                                 \
109     for (; _i < 4; ++_i)                                \
110       X##_f[_i] = 0;                                    \
111     S = (_s != 0);                                      \
112   } while (0)
113
114 #define _FP_FRAC_SRS_4(X,N,size)                \
115   do {                                          \
116     int _sticky;                                \
117     _FP_FRAC_SRST_4(X, _sticky, N, size);       \
118     X##_f[0] |= _sticky;                        \
119   } while (0)
120
121 #define _FP_FRAC_ADD_4(R,X,Y)                                           \
122   __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
123                   X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
124                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
125
126 #define _FP_FRAC_SUB_4(R,X,Y)                                           \
127   __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
128                   X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
129                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
130
131 #define _FP_FRAC_DEC_4(X,Y)                                             \
132   __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
133                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
134
135 #define _FP_FRAC_ADDI_4(X,I)                                            \
136   __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
137
138 #define _FP_ZEROFRAC_4  0,0,0,0
139 #define _FP_MINFRAC_4   0,0,0,1
140 #define _FP_MAXFRAC_4   (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
141
142 #define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
143 #define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
144 #define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
145 #define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
146
147 #define _FP_FRAC_EQ_4(X,Y)                              \
148  (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
149   && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
150
151 #define _FP_FRAC_GT_4(X,Y)                              \
152  (X##_f[3] > Y##_f[3] ||                                \
153   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
154    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
155     (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
156    ))                                                   \
157   ))                                                    \
158  )
159
160 #define _FP_FRAC_GE_4(X,Y)                              \
161  (X##_f[3] > Y##_f[3] ||                                \
162   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
163    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
164     (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
165    ))                                                   \
166   ))                                                    \
167  )
168
169
170 #define _FP_FRAC_CLZ_4(R,X)             \
171   do {                                  \
172     if (X##_f[3])                       \
173     {                                   \
174         __FP_CLZ(R,X##_f[3]);           \
175     }                                   \
176     else if (X##_f[2])                  \
177     {                                   \
178         __FP_CLZ(R,X##_f[2]);           \
179         R += _FP_W_TYPE_SIZE;           \
180     }                                   \
181     else if (X##_f[1])                  \
182     {                                   \
183         __FP_CLZ(R,X##_f[1]);           \
184         R += _FP_W_TYPE_SIZE*2;         \
185     }                                   \
186     else                                \
187     {                                   \
188         __FP_CLZ(R,X##_f[0]);           \
189         R += _FP_W_TYPE_SIZE*3;         \
190     }                                   \
191   } while(0)
192
193
194 #define _FP_UNPACK_RAW_4(fs, X, val)                            \
195   do {                                                          \
196     union _FP_UNION_##fs _flo; _flo.flt = (val);                \
197     X##_f[0] = _flo.bits.frac0;                                 \
198     X##_f[1] = _flo.bits.frac1;                                 \
199     X##_f[2] = _flo.bits.frac2;                                 \
200     X##_f[3] = _flo.bits.frac3;                                 \
201     X##_e  = _flo.bits.exp;                                     \
202     X##_s  = _flo.bits.sign;                                    \
203   } while (0)
204
205 #define _FP_UNPACK_RAW_4_P(fs, X, val)                          \
206   do {                                                          \
207     union _FP_UNION_##fs *_flo =                                \
208       (union _FP_UNION_##fs *)(val);                            \
209                                                                 \
210     X##_f[0] = _flo->bits.frac0;                                \
211     X##_f[1] = _flo->bits.frac1;                                \
212     X##_f[2] = _flo->bits.frac2;                                \
213     X##_f[3] = _flo->bits.frac3;                                \
214     X##_e  = _flo->bits.exp;                                    \
215     X##_s  = _flo->bits.sign;                                   \
216   } while (0)
217
218 #define _FP_PACK_RAW_4(fs, val, X)                              \
219   do {                                                          \
220     union _FP_UNION_##fs _flo;                                  \
221     _flo.bits.frac0 = X##_f[0];                                 \
222     _flo.bits.frac1 = X##_f[1];                                 \
223     _flo.bits.frac2 = X##_f[2];                                 \
224     _flo.bits.frac3 = X##_f[3];                                 \
225     _flo.bits.exp   = X##_e;                                    \
226     _flo.bits.sign  = X##_s;                                    \
227     (val) = _flo.flt;                                           \
228   } while (0)
229
230 #define _FP_PACK_RAW_4_P(fs, val, X)                            \
231   do {                                                          \
232     union _FP_UNION_##fs *_flo =                                \
233       (union _FP_UNION_##fs *)(val);                            \
234                                                                 \
235     _flo->bits.frac0 = X##_f[0];                                \
236     _flo->bits.frac1 = X##_f[1];                                \
237     _flo->bits.frac2 = X##_f[2];                                \
238     _flo->bits.frac3 = X##_f[3];                                \
239     _flo->bits.exp   = X##_e;                                   \
240     _flo->bits.sign  = X##_s;                                   \
241   } while (0)
242
243 /*
244  * Multiplication algorithms:
245  */
246
247 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
248
249 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)                       \
250   do {                                                                      \
251     _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);          \
252     _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);          \
253                                                                             \
254     doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
255     doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);                                 \
256     doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);                                 \
257     doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);                                 \
258     doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);                                 \
259     doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);                                 \
260     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
261                     _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,                   \
262                     0,0,_FP_FRAC_WORD_8(_z,1));                             \
263     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
264                     _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,                   \
265                     _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),            \
266                     _FP_FRAC_WORD_8(_z,1));                                 \
267     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
268                     _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,                   \
269                     0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));         \
270     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
271                     _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,                   \
272                     _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
273                     _FP_FRAC_WORD_8(_z,2));                                 \
274     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
275                     _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,                   \
276                     _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),            \
277                     _FP_FRAC_WORD_8(_z,2));                                 \
278     doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);                                 \
279     doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);                                 \
280     doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);                                 \
281     doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);                                 \
282     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
283                     _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,                   \
284                     0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));         \
285     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
286                     _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,                   \
287                     _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
288                     _FP_FRAC_WORD_8(_z,3));                                 \
289     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
290                     _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,                   \
291                     _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
292                     _FP_FRAC_WORD_8(_z,3));                                 \
293     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
294                     _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,                   \
295                     _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),            \
296                     _FP_FRAC_WORD_8(_z,3));                                 \
297     doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);                                 \
298     doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);                                 \
299     doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);                                 \
300     doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);                                 \
301     doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);                                 \
302     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
303                     _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,                   \
304                     0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));         \
305     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
306                     _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,                   \
307                     _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
308                     _FP_FRAC_WORD_8(_z,4));                                 \
309     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
310                     _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,                   \
311                     _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),            \
312                     _FP_FRAC_WORD_8(_z,4));                                 \
313     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
314                     _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,                   \
315                     0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));         \
316     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
317                     _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,                   \
318                     _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
319                     _FP_FRAC_WORD_8(_z,5));                                 \
320     doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);                                 \
321     __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),            \
322                     _b_f1,_b_f0,                                            \
323                     _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));           \
324                                                                             \
325     /* Normalize since we know where the msb of the multiplicands           \
326        were (bit B), we know that the msb of the of the product is          \
327        at either 2B or 2B-1.  */                                            \
328     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
329     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
330                     _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
331   } while (0)
332
333 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)                              \
334   do {                                                                      \
335     _FP_FRAC_DECL_8(_z);                                                    \
336                                                                             \
337     mpn_mul_n(_z_f, _x_f, _y_f, 4);                                         \
338                                                                             \
339     /* Normalize since we know where the msb of the multiplicands           \
340        were (bit B), we know that the msb of the of the product is          \
341        at either 2B or 2B-1.  */                                            \
342     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
343     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
344                     _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
345   } while (0)
346
347 /*
348  * Helper utility for _FP_DIV_MEAT_4_udiv:
349  * pppp = m * nnn
350  */
351 #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)                               \
352   do {                                                                      \
353     UWtype _t;                                                              \
354     umul_ppmm(p1,p0,m,n0);                                                  \
355     umul_ppmm(p2,_t,m,n1);                                                  \
356     __FP_FRAC_ADDI_2(p2,p1,_t);                                             \
357     umul_ppmm(p3,_t,m,n2);                                                  \
358     __FP_FRAC_ADDI_2(p3,p2,_t);                                             \
359   } while (0)
360
361 /*
362  * Division algorithms:
363  */
364
365 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)                                    \
366   do {                                                                      \
367     int _i;                                                                 \
368     _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);                               \
369     _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);                                     \
370     if (_FP_FRAC_GT_4(X, Y))                                                \
371       {                                                                     \
372         _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);                        \
373         _FP_FRAC_SRL_4(X, 1);                                               \
374       }                                                                     \
375     else                                                                    \
376       R##_e--;                                                              \
377                                                                             \
378     /* Normalize, i.e. make the most significant bit of the                 \
379        denominator set. */                                                  \
380     _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);                                 \
381                                                                             \
382     for (_i = 3; ; _i--)                                                    \
383       {                                                                     \
384         if (X##_f[3] == Y##_f[3])                                           \
385           {                                                                 \
386             /* This is a special case, not an optimization                  \
387                (X##_f[3]/Y##_f[3] would not fit into UWtype).               \
388                As X## is guaranteed to be < Y,  R##_f[_i] can be either     \
389                (UWtype)-1 or (UWtype)-2.  */                                \
390             R##_f[_i] = -1;                                                 \
391             if (!_i)                                                        \
392               break;                                                        \
393             __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],         \
394                             Y##_f[2], Y##_f[1], Y##_f[0], 0,                \
395                             X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);        \
396             _FP_FRAC_SUB_4(X, Y, X);                                        \
397             if (X##_f[3] > Y##_f[3])                                        \
398               {                                                             \
399                 R##_f[_i] = -2;                                             \
400                 _FP_FRAC_ADD_4(X, Y, X);                                    \
401               }                                                             \
402           }                                                                 \
403         else                                                                \
404           {                                                                 \
405             udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
406             umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],               \
407                           R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);         \
408             X##_f[2] = X##_f[1];                                            \
409             X##_f[1] = X##_f[0];                                            \
410             X##_f[0] = _n_f[_i];                                            \
411             if (_FP_FRAC_GT_4(_m, X))                                       \
412               {                                                             \
413                 R##_f[_i]--;                                                \
414                 _FP_FRAC_ADD_4(X, Y, X);                                    \
415                 if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))            \
416                   {                                                         \
417                     R##_f[_i]--;                                            \
418                     _FP_FRAC_ADD_4(X, Y, X);                                \
419                   }                                                         \
420               }                                                             \
421             _FP_FRAC_DEC_4(X, _m);                                          \
422             if (!_i)                                                        \
423               {                                                             \
424                 if (!_FP_FRAC_EQ_4(X, _m))                                  \
425                   R##_f[0] |= _FP_WORK_STICKY;                              \
426                 break;                                                      \
427               }                                                             \
428           }                                                                 \
429       }                                                                     \
430   } while (0)
431
432
433 /*
434  * Square root algorithms:
435  * We have just one right now, maybe Newton approximation
436  * should be added for those machines where division is fast.
437  */
438  
439 #define _FP_SQRT_MEAT_4(R, S, T, X, q)                          \
440   do {                                                          \
441     while (q)                                                   \
442       {                                                         \
443         T##_f[3] = S##_f[3] + q;                                \
444         if (T##_f[3] <= X##_f[3])                               \
445           {                                                     \
446             S##_f[3] = T##_f[3] + q;                            \
447             X##_f[3] -= T##_f[3];                               \
448             R##_f[3] += q;                                      \
449           }                                                     \
450         _FP_FRAC_SLL_4(X, 1);                                   \
451         q >>= 1;                                                \
452       }                                                         \
453     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
454     while (q)                                                   \
455       {                                                         \
456         T##_f[2] = S##_f[2] + q;                                \
457         T##_f[3] = S##_f[3];                                    \
458         if (T##_f[3] < X##_f[3] ||                              \
459             (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))     \
460           {                                                     \
461             S##_f[2] = T##_f[2] + q;                            \
462             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
463             __FP_FRAC_DEC_2(X##_f[3], X##_f[2],                 \
464                             T##_f[3], T##_f[2]);                \
465             R##_f[2] += q;                                      \
466           }                                                     \
467         _FP_FRAC_SLL_4(X, 1);                                   \
468         q >>= 1;                                                \
469       }                                                         \
470     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
471     while (q)                                                   \
472       {                                                         \
473         T##_f[1] = S##_f[1] + q;                                \
474         T##_f[2] = S##_f[2];                                    \
475         T##_f[3] = S##_f[3];                                    \
476         if (T##_f[3] < X##_f[3] ||                              \
477             (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||    \
478              (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))  \
479           {                                                     \
480             S##_f[1] = T##_f[1] + q;                            \
481             S##_f[2] += (T##_f[1] > S##_f[1]);                  \
482             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
483             __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],       \
484                             T##_f[3], T##_f[2], T##_f[1]);      \
485             R##_f[1] += q;                                      \
486           }                                                     \
487         _FP_FRAC_SLL_4(X, 1);                                   \
488         q >>= 1;                                                \
489       }                                                         \
490     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
491     while (q != _FP_WORK_ROUND)                                 \
492       {                                                         \
493         T##_f[0] = S##_f[0] + q;                                \
494         T##_f[1] = S##_f[1];                                    \
495         T##_f[2] = S##_f[2];                                    \
496         T##_f[3] = S##_f[3];                                    \
497         if (_FP_FRAC_GE_4(X,T))                                 \
498           {                                                     \
499             S##_f[0] = T##_f[0] + q;                            \
500             S##_f[1] += (T##_f[0] > S##_f[0]);                  \
501             S##_f[2] += (T##_f[1] > S##_f[1]);                  \
502             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
503             _FP_FRAC_DEC_4(X, T);                               \
504             R##_f[0] += q;                                      \
505           }                                                     \
506         _FP_FRAC_SLL_4(X, 1);                                   \
507         q >>= 1;                                                \
508       }                                                         \
509     if (!_FP_FRAC_ZEROP_4(X))                                   \
510       {                                                         \
511         if (_FP_FRAC_GT_4(X,S))                                 \
512           R##_f[0] |= _FP_WORK_ROUND;                           \
513         R##_f[0] |= _FP_WORK_STICKY;                            \
514       }                                                         \
515   } while (0)
516
517
518 /*
519  * Internals 
520  */
521
522 #define __FP_FRAC_SET_4(X,I3,I2,I1,I0)                                  \
523   (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
524
525 #ifndef __FP_FRAC_ADD_3
526 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
527   do {                                                          \
528     _FP_W_TYPE _c1, _c2;                                        \
529     r0 = x0 + y0;                                               \
530     _c1 = r0 < x0;                                              \
531     r1 = x1 + y1;                                               \
532     _c2 = r1 < x1;                                              \
533     r1 += _c1;                                                  \
534     _c2 |= r1 < _c1;                                            \
535     r2 = x2 + y2 + _c2;                                         \
536   } while (0)
537 #endif
538
539 #ifndef __FP_FRAC_ADD_4
540 #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
541   do {                                                          \
542     _FP_W_TYPE _c1, _c2, _c3;                                   \
543     r0 = x0 + y0;                                               \
544     _c1 = r0 < x0;                                              \
545     r1 = x1 + y1;                                               \
546     _c2 = r1 < x1;                                              \
547     r1 += _c1;                                                  \
548     _c2 |= r1 < _c1;                                            \
549     r2 = x2 + y2;                                               \
550     _c3 = r2 < x2;                                              \
551     r2 += _c2;                                                  \
552     _c3 |= r2 < _c2;                                            \
553     r3 = x3 + y3 + _c3;                                         \
554   } while (0)
555 #endif
556
557 #ifndef __FP_FRAC_SUB_3
558 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
559   do {                                                          \
560     _FP_W_TYPE _c1, _c2;                                        \
561     r0 = x0 - y0;                                               \
562     _c1 = r0 > x0;                                              \
563     r1 = x1 - y1;                                               \
564     _c2 = r1 > x1;                                              \
565     r1 -= _c1;                                                  \
566     _c2 |= _c1 && (y1 == x1);                                   \
567     r2 = x2 - y2 - _c2;                                         \
568   } while (0)
569 #endif
570
571 #ifndef __FP_FRAC_SUB_4
572 #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
573   do {                                                          \
574     _FP_W_TYPE _c1, _c2, _c3;                                   \
575     r0 = x0 - y0;                                               \
576     _c1 = r0 > x0;                                              \
577     r1 = x1 - y1;                                               \
578     _c2 = r1 > x1;                                              \
579     r1 -= _c1;                                                  \
580     _c2 |= _c1 && (y1 == x1);                                   \
581     r2 = x2 - y2;                                               \
582     _c3 = r2 > x2;                                              \
583     r2 -= _c2;                                                  \
584     _c3 |= _c2 && (y2 == x2);                                   \
585     r3 = x3 - y3 - _c3;                                         \
586   } while (0)
587 #endif
588
589 #ifndef __FP_FRAC_DEC_3
590 #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)                              \
591   do {                                                                  \
592     UWtype _t0, _t1, _t2;                                               \
593     _t0 = x0, _t1 = x1, _t2 = x2;                                       \
594     __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);            \
595   } while (0)
596 #endif
597
598 #ifndef __FP_FRAC_DEC_4
599 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)                        \
600   do {                                                                  \
601     UWtype _t0, _t1, _t2, _t3;                                          \
602     _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;                             \
603     __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);         \
604   } while (0)
605 #endif
606
607 #ifndef __FP_FRAC_ADDI_4
608 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \
609   do {                                                                  \
610     UWtype _t;                                                          \
611     _t = ((x0 += i) < i);                                               \
612     x1 += _t; _t = (x1 < _t);                                           \
613     x2 += _t; _t = (x2 < _t);                                           \
614     x3 += _t;                                                           \
615   } while (0)
616 #endif
617
618 /* Convert FP values between word sizes. This appears to be more
619  * complicated than I'd have expected it to be, so these might be
620  * wrong... These macros are in any case somewhat bogus because they
621  * use information about what various FRAC_n variables look like 
622  * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
623  * the ones in op-2.h and op-1.h. 
624  */
625 #define _FP_FRAC_COPY_1_4(D, S)         (D##_f = S##_f[0])
626
627 #define _FP_FRAC_COPY_2_4(D, S)                 \
628 do {                                            \
629   D##_f0 = S##_f[0];                            \
630   D##_f1 = S##_f[1];                            \
631 } while (0)
632
633 /* Assembly/disassembly for converting to/from integral types.  
634  * No shifting or overflow handled here.
635  */
636 /* Put the FP value X into r, which is an integer of size rsize. */
637 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
638   do {                                                                  \
639     if (rsize <= _FP_W_TYPE_SIZE)                                       \
640       r = X##_f[0];                                                     \
641     else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \
642     {                                                                   \
643       r = X##_f[1];                                                     \
644       r <<= _FP_W_TYPE_SIZE;                                            \
645       r += X##_f[0];                                                    \
646     }                                                                   \
647     else                                                                \
648     {                                                                   \
649       /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
650       /* and int == 4words as a single case.                     */     \
651       r = X##_f[3];                                                     \
652       r <<= _FP_W_TYPE_SIZE;                                            \
653       r += X##_f[2];                                                    \
654       r <<= _FP_W_TYPE_SIZE;                                            \
655       r += X##_f[1];                                                    \
656       r <<= _FP_W_TYPE_SIZE;                                            \
657       r += X##_f[0];                                                    \
658     }                                                                   \
659   } while (0)
660
661 /* "No disassemble Number Five!" */
662 /* move an integer of size rsize into X's fractional part. We rely on
663  * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
664  * having to mask the values we store into it.
665  */
666 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
667   do {                                                                  \
668     X##_f[0] = r;                                                       \
669     X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
670     X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
671     X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
672   } while (0);
673
674 #define _FP_FRAC_COPY_4_1(D, S)                 \
675 do {                                            \
676   D##_f[0] = S##_f;                             \
677   D##_f[1] = D##_f[2] = D##_f[3] = 0;           \
678 } while (0)
679
680 #define _FP_FRAC_COPY_4_2(D, S)                 \
681 do {                                            \
682   D##_f[0] = S##_f0;                            \
683   D##_f[1] = S##_f1;                            \
684   D##_f[2] = D##_f[3] = 0;                      \
685 } while (0)
686
687 #define _FP_FRAC_COPY_4_4(D,S)  _FP_FRAC_COPY_4(D,S)