soft-fp: fix floating-point to integer unsigned saturation.
[platform/upstream/glibc.git] / soft-fp / op-4.h
1 /* Software floating-point emulation.
2    Basic four-word fraction declaration and manipulation.
3    Copyright (C) 1997-2013 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Richard Henderson (rth@cygnus.com),
6                   Jakub Jelinek (jj@ultra.linux.cz),
7                   David S. Miller (davem@redhat.com) and
8                   Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10    The GNU C Library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2.1 of the License, or (at your option) any later version.
14
15    In addition to the permissions in the GNU Lesser General Public
16    License, the Free Software Foundation gives you unlimited
17    permission to link the compiled version of this file into
18    combinations with other programs, and to distribute those
19    combinations without any restriction coming from the use of this
20    file.  (The Lesser General Public License restrictions do apply in
21    other respects; for example, they cover modification of the file,
22    and distribution when not linked into a combine executable.)
23
24    The GNU C Library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with the GNU C Library; if not, see
31    <http://www.gnu.org/licenses/>.  */
32
33 #define _FP_FRAC_DECL_4(X)      _FP_W_TYPE X##_f[4]
34 #define _FP_FRAC_COPY_4(D,S)                    \
35   (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],    \
36    D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
37 #define _FP_FRAC_SET_4(X,I)     __FP_FRAC_SET_4(X, I)
38 #define _FP_FRAC_HIGH_4(X)      (X##_f[3])
39 #define _FP_FRAC_LOW_4(X)       (X##_f[0])
40 #define _FP_FRAC_WORD_4(X,w)    (X##_f[w])
41
42 #define _FP_FRAC_SLL_4(X,N)                                             \
43   do {                                                                  \
44     _FP_I_TYPE _up, _down, _skip, _i;                                   \
45     _skip = (N) / _FP_W_TYPE_SIZE;                                      \
46     _up = (N) % _FP_W_TYPE_SIZE;                                        \
47     _down = _FP_W_TYPE_SIZE - _up;                                      \
48     if (!_up)                                                           \
49       for (_i = 3; _i >= _skip; --_i)                                   \
50         X##_f[_i] = X##_f[_i-_skip];                                    \
51     else                                                                \
52       {                                                                 \
53         for (_i = 3; _i > _skip; --_i)                                  \
54           X##_f[_i] = X##_f[_i-_skip] << _up                            \
55                       | X##_f[_i-_skip-1] >> _down;                     \
56         X##_f[_i--] = X##_f[0] << _up;                                  \
57       }                                                                 \
58     for (; _i >= 0; --_i)                                               \
59       X##_f[_i] = 0;                                                    \
60   } while (0)
61
62 /* This one was broken too */
63 #define _FP_FRAC_SRL_4(X,N)                                             \
64   do {                                                                  \
65     _FP_I_TYPE _up, _down, _skip, _i;                                   \
66     _skip = (N) / _FP_W_TYPE_SIZE;                                      \
67     _down = (N) % _FP_W_TYPE_SIZE;                                      \
68     _up = _FP_W_TYPE_SIZE - _down;                                      \
69     if (!_down)                                                         \
70       for (_i = 0; _i <= 3-_skip; ++_i)                                 \
71         X##_f[_i] = X##_f[_i+_skip];                                    \
72     else                                                                \
73       {                                                                 \
74         for (_i = 0; _i < 3-_skip; ++_i)                                \
75           X##_f[_i] = X##_f[_i+_skip] >> _down                          \
76                       | X##_f[_i+_skip+1] << _up;                       \
77         X##_f[_i++] = X##_f[3] >> _down;                                \
78       }                                                                 \
79     for (; _i < 4; ++_i)                                                \
80       X##_f[_i] = 0;                                                    \
81   } while (0)
82
83
84 /* Right shift with sticky-lsb.
85  * What this actually means is that we do a standard right-shift,
86  * but that if any of the bits that fall off the right hand side
87  * were one then we always set the LSbit.
88  */
89 #define _FP_FRAC_SRST_4(X,S,N,size)                     \
90   do {                                                  \
91     _FP_I_TYPE _up, _down, _skip, _i;                   \
92     _FP_W_TYPE _s;                                      \
93     _skip = (N) / _FP_W_TYPE_SIZE;                      \
94     _down = (N) % _FP_W_TYPE_SIZE;                      \
95     _up = _FP_W_TYPE_SIZE - _down;                      \
96     for (_s = _i = 0; _i < _skip; ++_i)                 \
97       _s |= X##_f[_i];                                  \
98     if (!_down)                                         \
99       for (_i = 0; _i <= 3-_skip; ++_i)                 \
100         X##_f[_i] = X##_f[_i+_skip];                    \
101     else                                                \
102       {                                                 \
103         _s |= X##_f[_i] << _up;                         \
104         for (_i = 0; _i < 3-_skip; ++_i)                \
105           X##_f[_i] = X##_f[_i+_skip] >> _down          \
106                       | X##_f[_i+_skip+1] << _up;       \
107         X##_f[_i++] = X##_f[3] >> _down;                \
108       }                                                 \
109     for (; _i < 4; ++_i)                                \
110       X##_f[_i] = 0;                                    \
111     S = (_s != 0);                                      \
112   } while (0)
113
114 #define _FP_FRAC_SRS_4(X,N,size)                \
115   do {                                          \
116     int _sticky;                                \
117     _FP_FRAC_SRST_4(X, _sticky, N, size);       \
118     X##_f[0] |= _sticky;                        \
119   } while (0)
120
121 #define _FP_FRAC_ADD_4(R,X,Y)                                           \
122   __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
123                   X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
124                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
125
126 #define _FP_FRAC_SUB_4(R,X,Y)                                           \
127   __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
128                   X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
129                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
130
131 #define _FP_FRAC_DEC_4(X,Y)                                             \
132   __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
133                   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
134
135 #define _FP_FRAC_ADDI_4(X,I)                                            \
136   __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
137
138 #define _FP_ZEROFRAC_4  0,0,0,0
139 #define _FP_MINFRAC_4   0,0,0,1
140 #define _FP_MAXFRAC_4   (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
141
142 #define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
143 #define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
144 #define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
145 #define _FP_FRAC_HIGHBIT_DW_4(fs,X)     \
146   (_FP_FRAC_HIGH_DW_##fs(X) & _FP_HIGHBIT_DW_##fs)
147 #define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
148
149 #define _FP_FRAC_EQ_4(X,Y)                              \
150  (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
151   && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
152
153 #define _FP_FRAC_GT_4(X,Y)                              \
154  (X##_f[3] > Y##_f[3] ||                                \
155   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
156    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
157     (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
158    ))                                                   \
159   ))                                                    \
160  )
161
162 #define _FP_FRAC_GE_4(X,Y)                              \
163  (X##_f[3] > Y##_f[3] ||                                \
164   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
165    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
166     (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
167    ))                                                   \
168   ))                                                    \
169  )
170
171
172 #define _FP_FRAC_CLZ_4(R,X)             \
173   do {                                  \
174     if (X##_f[3])                       \
175     {                                   \
176         __FP_CLZ(R,X##_f[3]);           \
177     }                                   \
178     else if (X##_f[2])                  \
179     {                                   \
180         __FP_CLZ(R,X##_f[2]);           \
181         R += _FP_W_TYPE_SIZE;           \
182     }                                   \
183     else if (X##_f[1])                  \
184     {                                   \
185         __FP_CLZ(R,X##_f[1]);           \
186         R += _FP_W_TYPE_SIZE*2;         \
187     }                                   \
188     else                                \
189     {                                   \
190         __FP_CLZ(R,X##_f[0]);           \
191         R += _FP_W_TYPE_SIZE*3;         \
192     }                                   \
193   } while(0)
194
195
196 #define _FP_UNPACK_RAW_4(fs, X, val)                            \
197   do {                                                          \
198     union _FP_UNION_##fs _flo; _flo.flt = (val);                \
199     X##_f[0] = _flo.bits.frac0;                                 \
200     X##_f[1] = _flo.bits.frac1;                                 \
201     X##_f[2] = _flo.bits.frac2;                                 \
202     X##_f[3] = _flo.bits.frac3;                                 \
203     X##_e  = _flo.bits.exp;                                     \
204     X##_s  = _flo.bits.sign;                                    \
205   } while (0)
206
207 #define _FP_UNPACK_RAW_4_P(fs, X, val)                          \
208   do {                                                          \
209     union _FP_UNION_##fs *_flo =                                \
210       (union _FP_UNION_##fs *)(val);                            \
211                                                                 \
212     X##_f[0] = _flo->bits.frac0;                                \
213     X##_f[1] = _flo->bits.frac1;                                \
214     X##_f[2] = _flo->bits.frac2;                                \
215     X##_f[3] = _flo->bits.frac3;                                \
216     X##_e  = _flo->bits.exp;                                    \
217     X##_s  = _flo->bits.sign;                                   \
218   } while (0)
219
220 #define _FP_PACK_RAW_4(fs, val, X)                              \
221   do {                                                          \
222     union _FP_UNION_##fs _flo;                                  \
223     _flo.bits.frac0 = X##_f[0];                                 \
224     _flo.bits.frac1 = X##_f[1];                                 \
225     _flo.bits.frac2 = X##_f[2];                                 \
226     _flo.bits.frac3 = X##_f[3];                                 \
227     _flo.bits.exp   = X##_e;                                    \
228     _flo.bits.sign  = X##_s;                                    \
229     (val) = _flo.flt;                                           \
230   } while (0)
231
232 #define _FP_PACK_RAW_4_P(fs, val, X)                            \
233   do {                                                          \
234     union _FP_UNION_##fs *_flo =                                \
235       (union _FP_UNION_##fs *)(val);                            \
236                                                                 \
237     _flo->bits.frac0 = X##_f[0];                                \
238     _flo->bits.frac1 = X##_f[1];                                \
239     _flo->bits.frac2 = X##_f[2];                                \
240     _flo->bits.frac3 = X##_f[3];                                \
241     _flo->bits.exp   = X##_e;                                   \
242     _flo->bits.sign  = X##_s;                                   \
243   } while (0)
244
245 /*
246  * Multiplication algorithms:
247  */
248
249 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
250
251 #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit)                    \
252   do {                                                                      \
253     _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);                               \
254     _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);          \
255                                                                             \
256     doit(_FP_FRAC_WORD_8(R,1), _FP_FRAC_WORD_8(R,0), X##_f[0], Y##_f[0]);   \
257     doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);                                 \
258     doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);                                 \
259     doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);                                 \
260     doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);                                 \
261     doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);                                 \
262     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2),              \
263                     _FP_FRAC_WORD_8(R,1), 0,_b_f1,_b_f0,                    \
264                     0,0,_FP_FRAC_WORD_8(R,1));                              \
265     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2),              \
266                     _FP_FRAC_WORD_8(R,1), 0,_c_f1,_c_f0,                    \
267                     _FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2),              \
268                     _FP_FRAC_WORD_8(R,1));                                  \
269     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),              \
270                     _FP_FRAC_WORD_8(R,2), 0,_d_f1,_d_f0,                    \
271                     0,_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2));           \
272     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),              \
273                     _FP_FRAC_WORD_8(R,2), 0,_e_f1,_e_f0,                    \
274                     _FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),              \
275                     _FP_FRAC_WORD_8(R,2));                                  \
276     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),              \
277                     _FP_FRAC_WORD_8(R,2), 0,_f_f1,_f_f0,                    \
278                     _FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3),              \
279                     _FP_FRAC_WORD_8(R,2));                                  \
280     doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);                                 \
281     doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);                                 \
282     doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);                                 \
283     doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);                                 \
284     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
285                     _FP_FRAC_WORD_8(R,3), 0,_b_f1,_b_f0,                    \
286                     0,_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3));           \
287     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
288                     _FP_FRAC_WORD_8(R,3), 0,_c_f1,_c_f0,                    \
289                     _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
290                     _FP_FRAC_WORD_8(R,3));                                  \
291     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
292                     _FP_FRAC_WORD_8(R,3), 0,_d_f1,_d_f0,                    \
293                     _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
294                     _FP_FRAC_WORD_8(R,3));                                  \
295     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
296                     _FP_FRAC_WORD_8(R,3), 0,_e_f1,_e_f0,                    \
297                     _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4),              \
298                     _FP_FRAC_WORD_8(R,3));                                  \
299     doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);                                 \
300     doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);                                 \
301     doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);                                 \
302     doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);                                 \
303     doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);                                 \
304     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),              \
305                     _FP_FRAC_WORD_8(R,4), 0,_b_f1,_b_f0,                    \
306                     0,_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4));           \
307     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),              \
308                     _FP_FRAC_WORD_8(R,4), 0,_c_f1,_c_f0,                    \
309                     _FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),              \
310                     _FP_FRAC_WORD_8(R,4));                                  \
311     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),              \
312                     _FP_FRAC_WORD_8(R,4), 0,_d_f1,_d_f0,                    \
313                     _FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5),              \
314                     _FP_FRAC_WORD_8(R,4));                                  \
315     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6),              \
316                     _FP_FRAC_WORD_8(R,5), 0,_e_f1,_e_f0,                    \
317                     0,_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5));           \
318     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6),              \
319                     _FP_FRAC_WORD_8(R,5), 0,_f_f1,_f_f0,                    \
320                     _FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6),              \
321                     _FP_FRAC_WORD_8(R,5));                                  \
322     doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);                                 \
323     __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6),              \
324                     _b_f1,_b_f0,                                            \
325                     _FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6));             \
326   } while (0)
327
328 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)                       \
329   do {                                                                      \
330     _FP_FRAC_DECL_8(_z);                                                    \
331                                                                             \
332     _FP_MUL_MEAT_DW_4_wide(wfracbits, _z, X, Y, doit);                      \
333                                                                             \
334     /* Normalize since we know where the msb of the multiplicands           \
335        were (bit B), we know that the msb of the of the product is          \
336        at either 2B or 2B-1.  */                                            \
337     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
338     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
339                     _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
340   } while (0)
341
342 #define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y)                           \
343   do {                                                                      \
344     mpn_mul_n(R##_f, _x_f, _y_f, 4);                                        \
345   } while (0)
346
347 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)                              \
348   do {                                                                      \
349     _FP_FRAC_DECL_8(_z);                                                    \
350                                                                             \
351     _FP_MUL_MEAT_DW_4_gmp(wfracbits, _z, X, Y);                             \
352                                                                             \
353     /* Normalize since we know where the msb of the multiplicands           \
354        were (bit B), we know that the msb of the of the product is          \
355        at either 2B or 2B-1.  */                                            \
356     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                           \
357     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),        \
358                     _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
359   } while (0)
360
361 /*
362  * Helper utility for _FP_DIV_MEAT_4_udiv:
363  * pppp = m * nnn
364  */
365 #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)                               \
366   do {                                                                      \
367     UWtype _t;                                                              \
368     umul_ppmm(p1,p0,m,n0);                                                  \
369     umul_ppmm(p2,_t,m,n1);                                                  \
370     __FP_FRAC_ADDI_2(p2,p1,_t);                                             \
371     umul_ppmm(p3,_t,m,n2);                                                  \
372     __FP_FRAC_ADDI_2(p3,p2,_t);                                             \
373   } while (0)
374
375 /*
376  * Division algorithms:
377  */
378
379 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)                                    \
380   do {                                                                      \
381     int _i;                                                                 \
382     _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);                               \
383     _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);                                     \
384     if (_FP_FRAC_GT_4(X, Y))                                                \
385       {                                                                     \
386         _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);                        \
387         _FP_FRAC_SRL_4(X, 1);                                               \
388       }                                                                     \
389     else                                                                    \
390       R##_e--;                                                              \
391                                                                             \
392     /* Normalize, i.e. make the most significant bit of the                 \
393        denominator set. */                                                  \
394     _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);                                 \
395                                                                             \
396     for (_i = 3; ; _i--)                                                    \
397       {                                                                     \
398         if (X##_f[3] == Y##_f[3])                                           \
399           {                                                                 \
400             /* This is a special case, not an optimization                  \
401                (X##_f[3]/Y##_f[3] would not fit into UWtype).               \
402                As X## is guaranteed to be < Y,  R##_f[_i] can be either     \
403                (UWtype)-1 or (UWtype)-2.  */                                \
404             R##_f[_i] = -1;                                                 \
405             if (!_i)                                                        \
406               break;                                                        \
407             __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],         \
408                             Y##_f[2], Y##_f[1], Y##_f[0], 0,                \
409                             X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);        \
410             _FP_FRAC_SUB_4(X, Y, X);                                        \
411             if (X##_f[3] > Y##_f[3])                                        \
412               {                                                             \
413                 R##_f[_i] = -2;                                             \
414                 _FP_FRAC_ADD_4(X, Y, X);                                    \
415               }                                                             \
416           }                                                                 \
417         else                                                                \
418           {                                                                 \
419             udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
420             umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],               \
421                           R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);         \
422             X##_f[2] = X##_f[1];                                            \
423             X##_f[1] = X##_f[0];                                            \
424             X##_f[0] = _n_f[_i];                                            \
425             if (_FP_FRAC_GT_4(_m, X))                                       \
426               {                                                             \
427                 R##_f[_i]--;                                                \
428                 _FP_FRAC_ADD_4(X, Y, X);                                    \
429                 if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))            \
430                   {                                                         \
431                     R##_f[_i]--;                                            \
432                     _FP_FRAC_ADD_4(X, Y, X);                                \
433                   }                                                         \
434               }                                                             \
435             _FP_FRAC_DEC_4(X, _m);                                          \
436             if (!_i)                                                        \
437               {                                                             \
438                 if (!_FP_FRAC_EQ_4(X, _m))                                  \
439                   R##_f[0] |= _FP_WORK_STICKY;                              \
440                 break;                                                      \
441               }                                                             \
442           }                                                                 \
443       }                                                                     \
444   } while (0)
445
446
447 /*
448  * Square root algorithms:
449  * We have just one right now, maybe Newton approximation
450  * should be added for those machines where division is fast.
451  */
452
453 #define _FP_SQRT_MEAT_4(R, S, T, X, q)                          \
454   do {                                                          \
455     while (q)                                                   \
456       {                                                         \
457         T##_f[3] = S##_f[3] + q;                                \
458         if (T##_f[3] <= X##_f[3])                               \
459           {                                                     \
460             S##_f[3] = T##_f[3] + q;                            \
461             X##_f[3] -= T##_f[3];                               \
462             R##_f[3] += q;                                      \
463           }                                                     \
464         _FP_FRAC_SLL_4(X, 1);                                   \
465         q >>= 1;                                                \
466       }                                                         \
467     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
468     while (q)                                                   \
469       {                                                         \
470         T##_f[2] = S##_f[2] + q;                                \
471         T##_f[3] = S##_f[3];                                    \
472         if (T##_f[3] < X##_f[3] ||                              \
473             (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))     \
474           {                                                     \
475             S##_f[2] = T##_f[2] + q;                            \
476             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
477             __FP_FRAC_DEC_2(X##_f[3], X##_f[2],                 \
478                             T##_f[3], T##_f[2]);                \
479             R##_f[2] += q;                                      \
480           }                                                     \
481         _FP_FRAC_SLL_4(X, 1);                                   \
482         q >>= 1;                                                \
483       }                                                         \
484     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
485     while (q)                                                   \
486       {                                                         \
487         T##_f[1] = S##_f[1] + q;                                \
488         T##_f[2] = S##_f[2];                                    \
489         T##_f[3] = S##_f[3];                                    \
490         if (T##_f[3] < X##_f[3] ||                              \
491             (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||    \
492              (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))  \
493           {                                                     \
494             S##_f[1] = T##_f[1] + q;                            \
495             S##_f[2] += (T##_f[1] > S##_f[1]);                  \
496             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
497             __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],       \
498                             T##_f[3], T##_f[2], T##_f[1]);      \
499             R##_f[1] += q;                                      \
500           }                                                     \
501         _FP_FRAC_SLL_4(X, 1);                                   \
502         q >>= 1;                                                \
503       }                                                         \
504     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                 \
505     while (q != _FP_WORK_ROUND)                                 \
506       {                                                         \
507         T##_f[0] = S##_f[0] + q;                                \
508         T##_f[1] = S##_f[1];                                    \
509         T##_f[2] = S##_f[2];                                    \
510         T##_f[3] = S##_f[3];                                    \
511         if (_FP_FRAC_GE_4(X,T))                                 \
512           {                                                     \
513             S##_f[0] = T##_f[0] + q;                            \
514             S##_f[1] += (T##_f[0] > S##_f[0]);                  \
515             S##_f[2] += (T##_f[1] > S##_f[1]);                  \
516             S##_f[3] += (T##_f[2] > S##_f[2]);                  \
517             _FP_FRAC_DEC_4(X, T);                               \
518             R##_f[0] += q;                                      \
519           }                                                     \
520         _FP_FRAC_SLL_4(X, 1);                                   \
521         q >>= 1;                                                \
522       }                                                         \
523     if (!_FP_FRAC_ZEROP_4(X))                                   \
524       {                                                         \
525         if (_FP_FRAC_GT_4(X,S))                                 \
526           R##_f[0] |= _FP_WORK_ROUND;                           \
527         R##_f[0] |= _FP_WORK_STICKY;                            \
528       }                                                         \
529   } while (0)
530
531
532 /*
533  * Internals
534  */
535
536 #define __FP_FRAC_SET_4(X,I3,I2,I1,I0)                                  \
537   (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
538
539 #ifndef __FP_FRAC_ADD_3
540 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
541   do {                                                          \
542     _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2;          \
543     r0 = x0 + y0;                                               \
544     __FP_FRAC_ADD_3_c1 = r0 < x0;                               \
545     r1 = x1 + y1;                                               \
546     __FP_FRAC_ADD_3_c2 = r1 < x1;                               \
547     r1 += __FP_FRAC_ADD_3_c1;                                   \
548     __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1;              \
549     r2 = x2 + y2 + __FP_FRAC_ADD_3_c2;                          \
550   } while (0)
551 #endif
552
553 #ifndef __FP_FRAC_ADD_4
554 #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
555   do {                                                          \
556     _FP_W_TYPE _c1, _c2, _c3;                                   \
557     r0 = x0 + y0;                                               \
558     _c1 = r0 < x0;                                              \
559     r1 = x1 + y1;                                               \
560     _c2 = r1 < x1;                                              \
561     r1 += _c1;                                                  \
562     _c2 |= r1 < _c1;                                            \
563     r2 = x2 + y2;                                               \
564     _c3 = r2 < x2;                                              \
565     r2 += _c2;                                                  \
566     _c3 |= r2 < _c2;                                            \
567     r3 = x3 + y3 + _c3;                                         \
568   } while (0)
569 #endif
570
571 #ifndef __FP_FRAC_SUB_3
572 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)             \
573   do {                                                          \
574     _FP_W_TYPE _c1, _c2;                                        \
575     r0 = x0 - y0;                                               \
576     _c1 = r0 > x0;                                              \
577     r1 = x1 - y1;                                               \
578     _c2 = r1 > x1;                                              \
579     r1 -= _c1;                                                  \
580     _c2 |= _c1 && (y1 == x1);                                   \
581     r2 = x2 - y2 - _c2;                                         \
582   } while (0)
583 #endif
584
585 #ifndef __FP_FRAC_SUB_4
586 #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)    \
587   do {                                                          \
588     _FP_W_TYPE _c1, _c2, _c3;                                   \
589     r0 = x0 - y0;                                               \
590     _c1 = r0 > x0;                                              \
591     r1 = x1 - y1;                                               \
592     _c2 = r1 > x1;                                              \
593     r1 -= _c1;                                                  \
594     _c2 |= _c1 && (y1 == x1);                                   \
595     r2 = x2 - y2;                                               \
596     _c3 = r2 > x2;                                              \
597     r2 -= _c2;                                                  \
598     _c3 |= _c2 && (y2 == x2);                                   \
599     r3 = x3 - y3 - _c3;                                         \
600   } while (0)
601 #endif
602
603 #ifndef __FP_FRAC_DEC_3
604 #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)                              \
605   do {                                                                  \
606     UWtype _t0, _t1, _t2;                                               \
607     _t0 = x0, _t1 = x1, _t2 = x2;                                       \
608     __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);            \
609   } while (0)
610 #endif
611
612 #ifndef __FP_FRAC_DEC_4
613 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)                        \
614   do {                                                                  \
615     UWtype _t0, _t1, _t2, _t3;                                          \
616     _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;                             \
617     __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);         \
618   } while (0)
619 #endif
620
621 #ifndef __FP_FRAC_ADDI_4
622 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \
623   do {                                                                  \
624     UWtype _t;                                                          \
625     _t = ((x0 += i) < i);                                               \
626     x1 += _t; _t = (x1 < _t);                                           \
627     x2 += _t; _t = (x2 < _t);                                           \
628     x3 += _t;                                                           \
629   } while (0)
630 #endif
631
632 /* Convert FP values between word sizes. This appears to be more
633  * complicated than I'd have expected it to be, so these might be
634  * wrong... These macros are in any case somewhat bogus because they
635  * use information about what various FRAC_n variables look like
636  * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
637  * the ones in op-2.h and op-1.h.
638  */
639 #define _FP_FRAC_COPY_1_4(D, S)         (D##_f = S##_f[0])
640
641 #define _FP_FRAC_COPY_2_4(D, S)                 \
642 do {                                            \
643   D##_f0 = S##_f[0];                            \
644   D##_f1 = S##_f[1];                            \
645 } while (0)
646
647 /* Assembly/disassembly for converting to/from integral types.
648  * No shifting or overflow handled here.
649  */
650 /* Put the FP value X into r, which is an integer of size rsize. */
651 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
652   do {                                                                  \
653     if (rsize <= _FP_W_TYPE_SIZE)                                       \
654       r = X##_f[0];                                                     \
655     else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \
656     {                                                                   \
657       r = X##_f[1];                                                     \
658       r <<= _FP_W_TYPE_SIZE;                                            \
659       r += X##_f[0];                                                    \
660     }                                                                   \
661     else                                                                \
662     {                                                                   \
663       /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
664       /* and int == 4words as a single case.                     */     \
665       r = X##_f[3];                                                     \
666       r <<= _FP_W_TYPE_SIZE;                                            \
667       r += X##_f[2];                                                    \
668       r <<= _FP_W_TYPE_SIZE;                                            \
669       r += X##_f[1];                                                    \
670       r <<= _FP_W_TYPE_SIZE;                                            \
671       r += X##_f[0];                                                    \
672     }                                                                   \
673   } while (0)
674
675 /* "No disassemble Number Five!" */
676 /* move an integer of size rsize into X's fractional part. We rely on
677  * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
678  * having to mask the values we store into it.
679  */
680 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
681   do {                                                                  \
682     X##_f[0] = r;                                                       \
683     X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
684     X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
685     X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
686   } while (0)
687
688 #define _FP_FRAC_COPY_4_1(D, S)                 \
689 do {                                            \
690   D##_f[0] = S##_f;                             \
691   D##_f[1] = D##_f[2] = D##_f[3] = 0;           \
692 } while (0)
693
694 #define _FP_FRAC_COPY_4_2(D, S)                 \
695 do {                                            \
696   D##_f[0] = S##_f0;                            \
697   D##_f[1] = S##_f1;                            \
698   D##_f[2] = D##_f[3] = 0;                      \
699 } while (0)
700
701 #define _FP_FRAC_COPY_4_4(D,S)  _FP_FRAC_COPY_4(D,S)