1 /* Software floating-point emulation.
2 Basic four-word fraction declaration and manipulation.
3 Copyright (C) 1997-2014 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
34 #define _FP_FRAC_COPY_4(D, S) \
35 (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
36 D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
37 #define _FP_FRAC_SET_4(X, I) __FP_FRAC_SET_4 (X, I)
38 #define _FP_FRAC_HIGH_4(X) (X##_f[3])
39 #define _FP_FRAC_LOW_4(X) (X##_f[0])
40 #define _FP_FRAC_WORD_4(X, w) (X##_f[w])
42 #define _FP_FRAC_SLL_4(X, N) \
45 _FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down; \
46 _FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i; \
47 _FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE; \
48 _FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE; \
49 _FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up; \
50 if (!_FP_FRAC_SLL_4_up) \
51 for (_FP_FRAC_SLL_4_i = 3; \
52 _FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip; \
54 X##_f[_FP_FRAC_SLL_4_i] \
55 = X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]; \
58 for (_FP_FRAC_SLL_4_i = 3; \
59 _FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip; \
61 X##_f[_FP_FRAC_SLL_4_i] \
62 = ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip] \
63 << _FP_FRAC_SLL_4_up) \
64 | (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1] \
65 >> _FP_FRAC_SLL_4_down)); \
66 X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up; \
68 for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i) \
69 X##_f[_FP_FRAC_SLL_4_i] = 0; \
73 /* This one was broken too. */
74 #define _FP_FRAC_SRL_4(X, N) \
77 _FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down; \
78 _FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i; \
79 _FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE; \
80 _FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE; \
81 _FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down; \
82 if (!_FP_FRAC_SRL_4_down) \
83 for (_FP_FRAC_SRL_4_i = 0; \
84 _FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip; \
86 X##_f[_FP_FRAC_SRL_4_i] \
87 = X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]; \
90 for (_FP_FRAC_SRL_4_i = 0; \
91 _FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip; \
93 X##_f[_FP_FRAC_SRL_4_i] \
94 = ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip] \
95 >> _FP_FRAC_SRL_4_down) \
96 | (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1] \
97 << _FP_FRAC_SRL_4_up)); \
98 X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down; \
100 for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i) \
101 X##_f[_FP_FRAC_SRL_4_i] = 0; \
106 /* Right shift with sticky-lsb.
107 What this actually means is that we do a standard right-shift,
108 but that if any of the bits that fall off the right hand side
109 were one then we always set the LSbit. */
110 #define _FP_FRAC_SRST_4(X, S, N, size) \
113 _FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down; \
114 _FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i; \
115 _FP_W_TYPE _FP_FRAC_SRST_4_s; \
116 _FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE; \
117 _FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE; \
118 _FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down; \
119 for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0; \
120 _FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip; \
121 ++_FP_FRAC_SRST_4_i) \
122 _FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i]; \
123 if (!_FP_FRAC_SRST_4_down) \
124 for (_FP_FRAC_SRST_4_i = 0; \
125 _FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip; \
126 ++_FP_FRAC_SRST_4_i) \
127 X##_f[_FP_FRAC_SRST_4_i] \
128 = X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]; \
132 |= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up; \
133 for (_FP_FRAC_SRST_4_i = 0; \
134 _FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip; \
135 ++_FP_FRAC_SRST_4_i) \
136 X##_f[_FP_FRAC_SRST_4_i] \
137 = ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip] \
138 >> _FP_FRAC_SRST_4_down) \
139 | (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1] \
140 << _FP_FRAC_SRST_4_up)); \
141 X##_f[_FP_FRAC_SRST_4_i++] \
142 = X##_f[3] >> _FP_FRAC_SRST_4_down; \
144 for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i) \
145 X##_f[_FP_FRAC_SRST_4_i] = 0; \
146 S = (_FP_FRAC_SRST_4_s != 0); \
150 #define _FP_FRAC_SRS_4(X, N, size) \
153 int _FP_FRAC_SRS_4_sticky; \
154 _FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, N, size); \
155 X##_f[0] |= _FP_FRAC_SRS_4_sticky; \
159 #define _FP_FRAC_ADD_4(R, X, Y) \
160 __FP_FRAC_ADD_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
161 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
162 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
164 #define _FP_FRAC_SUB_4(R, X, Y) \
165 __FP_FRAC_SUB_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
166 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
167 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
169 #define _FP_FRAC_DEC_4(X, Y) \
170 __FP_FRAC_DEC_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
171 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
173 #define _FP_FRAC_ADDI_4(X, I) \
174 __FP_FRAC_ADDI_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
176 #define _FP_ZEROFRAC_4 0, 0, 0, 0
177 #define _FP_MINFRAC_4 0, 0, 0, 1
178 #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
180 #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
181 #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE) X##_f[3] < 0)
182 #define _FP_FRAC_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
183 #define _FP_FRAC_HIGHBIT_DW_4(fs, X) \
184 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
185 #define _FP_FRAC_CLEAR_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
187 #define _FP_FRAC_EQ_4(X, Y) \
188 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
189 && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
191 #define _FP_FRAC_GT_4(X, Y) \
192 (X##_f[3] > Y##_f[3] \
193 || (X##_f[3] == Y##_f[3] \
194 && (X##_f[2] > Y##_f[2] \
195 || (X##_f[2] == Y##_f[2] \
196 && (X##_f[1] > Y##_f[1] \
197 || (X##_f[1] == Y##_f[1] \
198 && X##_f[0] > Y##_f[0]))))))
200 #define _FP_FRAC_GE_4(X, Y) \
201 (X##_f[3] > Y##_f[3] \
202 || (X##_f[3] == Y##_f[3] \
203 && (X##_f[2] > Y##_f[2] \
204 || (X##_f[2] == Y##_f[2] \
205 && (X##_f[1] > Y##_f[1] \
206 || (X##_f[1] == Y##_f[1] \
207 && X##_f[0] >= Y##_f[0]))))))
210 #define _FP_FRAC_CLZ_4(R, X) \
214 __FP_CLZ (R, X##_f[3]); \
217 __FP_CLZ (R, X##_f[2]); \
218 R += _FP_W_TYPE_SIZE; \
222 __FP_CLZ (R, X##_f[1]); \
223 R += _FP_W_TYPE_SIZE*2; \
227 __FP_CLZ (R, X##_f[0]); \
228 R += _FP_W_TYPE_SIZE*3; \
234 #define _FP_UNPACK_RAW_4(fs, X, val) \
237 union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo; \
238 _FP_UNPACK_RAW_4_flo.flt = (val); \
239 X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0; \
240 X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1; \
241 X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2; \
242 X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3; \
243 X##_e = _FP_UNPACK_RAW_4_flo.bits.exp; \
244 X##_s = _FP_UNPACK_RAW_4_flo.bits.sign; \
248 #define _FP_UNPACK_RAW_4_P(fs, X, val) \
251 union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo \
252 = (union _FP_UNION_##fs *) (val); \
254 X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0; \
255 X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1; \
256 X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2; \
257 X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3; \
258 X##_e = _FP_UNPACK_RAW_4_P_flo->bits.exp; \
259 X##_s = _FP_UNPACK_RAW_4_P_flo->bits.sign; \
263 #define _FP_PACK_RAW_4(fs, val, X) \
266 union _FP_UNION_##fs _FP_PACK_RAW_4_flo; \
267 _FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0]; \
268 _FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1]; \
269 _FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2]; \
270 _FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3]; \
271 _FP_PACK_RAW_4_flo.bits.exp = X##_e; \
272 _FP_PACK_RAW_4_flo.bits.sign = X##_s; \
273 (val) = _FP_PACK_RAW_4_flo.flt; \
277 #define _FP_PACK_RAW_4_P(fs, val, X) \
280 union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo \
281 = (union _FP_UNION_##fs *) (val); \
283 _FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0]; \
284 _FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1]; \
285 _FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2]; \
286 _FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3]; \
287 _FP_PACK_RAW_4_P_flo->bits.exp = X##_e; \
288 _FP_PACK_RAW_4_P_flo->bits.sign = X##_s; \
292 /* Multiplication algorithms: */
294 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
296 #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
299 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b); \
300 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c); \
301 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d); \
302 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e); \
303 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f); \
305 doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), \
306 X##_f[0], Y##_f[0]); \
307 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
308 X##_f[0], Y##_f[1]); \
309 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
310 X##_f[1], Y##_f[0]); \
311 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
312 X##_f[1], Y##_f[1]); \
313 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
314 X##_f[0], Y##_f[2]); \
315 doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
316 X##_f[2], Y##_f[0]); \
317 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
318 _FP_FRAC_WORD_8 (R, 1), 0, \
319 _FP_MUL_MEAT_DW_4_wide_b_f1, \
320 _FP_MUL_MEAT_DW_4_wide_b_f0, \
321 0, 0, _FP_FRAC_WORD_8 (R, 1)); \
322 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
323 _FP_FRAC_WORD_8 (R, 1), 0, \
324 _FP_MUL_MEAT_DW_4_wide_c_f1, \
325 _FP_MUL_MEAT_DW_4_wide_c_f0, \
326 _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
327 _FP_FRAC_WORD_8 (R, 1)); \
328 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
329 _FP_FRAC_WORD_8 (R, 2), 0, \
330 _FP_MUL_MEAT_DW_4_wide_d_f1, \
331 _FP_MUL_MEAT_DW_4_wide_d_f0, \
332 0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
333 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
334 _FP_FRAC_WORD_8 (R, 2), 0, \
335 _FP_MUL_MEAT_DW_4_wide_e_f1, \
336 _FP_MUL_MEAT_DW_4_wide_e_f0, \
337 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
338 _FP_FRAC_WORD_8 (R, 2)); \
339 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
340 _FP_FRAC_WORD_8 (R, 2), 0, \
341 _FP_MUL_MEAT_DW_4_wide_f_f1, \
342 _FP_MUL_MEAT_DW_4_wide_f_f0, \
343 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
344 _FP_FRAC_WORD_8 (R, 2)); \
345 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, \
346 _FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]); \
347 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, \
348 _FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]); \
349 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
350 X##_f[1], Y##_f[2]); \
351 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
352 X##_f[2], Y##_f[1]); \
353 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
354 _FP_FRAC_WORD_8 (R, 3), 0, \
355 _FP_MUL_MEAT_DW_4_wide_b_f1, \
356 _FP_MUL_MEAT_DW_4_wide_b_f0, \
357 0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
358 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
359 _FP_FRAC_WORD_8 (R, 3), 0, \
360 _FP_MUL_MEAT_DW_4_wide_c_f1, \
361 _FP_MUL_MEAT_DW_4_wide_c_f0, \
362 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
363 _FP_FRAC_WORD_8 (R, 3)); \
364 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
365 _FP_FRAC_WORD_8 (R, 3), 0, \
366 _FP_MUL_MEAT_DW_4_wide_d_f1, \
367 _FP_MUL_MEAT_DW_4_wide_d_f0, \
368 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
369 _FP_FRAC_WORD_8 (R, 3)); \
370 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
371 _FP_FRAC_WORD_8 (R, 3), 0, \
372 _FP_MUL_MEAT_DW_4_wide_e_f1, \
373 _FP_MUL_MEAT_DW_4_wide_e_f0, \
374 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
375 _FP_FRAC_WORD_8 (R, 3)); \
376 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
377 X##_f[2], Y##_f[2]); \
378 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
379 X##_f[1], Y##_f[3]); \
380 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
381 X##_f[3], Y##_f[1]); \
382 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
383 X##_f[2], Y##_f[3]); \
384 doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
385 X##_f[3], Y##_f[2]); \
386 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
387 _FP_FRAC_WORD_8 (R, 4), 0, \
388 _FP_MUL_MEAT_DW_4_wide_b_f1, \
389 _FP_MUL_MEAT_DW_4_wide_b_f0, \
390 0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
391 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
392 _FP_FRAC_WORD_8 (R, 4), 0, \
393 _FP_MUL_MEAT_DW_4_wide_c_f1, \
394 _FP_MUL_MEAT_DW_4_wide_c_f0, \
395 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
396 _FP_FRAC_WORD_8 (R, 4)); \
397 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
398 _FP_FRAC_WORD_8 (R, 4), 0, \
399 _FP_MUL_MEAT_DW_4_wide_d_f1, \
400 _FP_MUL_MEAT_DW_4_wide_d_f0, \
401 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
402 _FP_FRAC_WORD_8 (R, 4)); \
403 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
404 _FP_FRAC_WORD_8 (R, 5), 0, \
405 _FP_MUL_MEAT_DW_4_wide_e_f1, \
406 _FP_MUL_MEAT_DW_4_wide_e_f0, \
407 0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
408 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
409 _FP_FRAC_WORD_8 (R, 5), 0, \
410 _FP_MUL_MEAT_DW_4_wide_f_f1, \
411 _FP_MUL_MEAT_DW_4_wide_f_f0, \
412 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
413 _FP_FRAC_WORD_8 (R, 5)); \
414 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
415 X##_f[3], Y##_f[3]); \
416 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
417 _FP_MUL_MEAT_DW_4_wide_b_f1, \
418 _FP_MUL_MEAT_DW_4_wide_b_f0, \
419 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
423 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
426 _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z); \
428 _FP_MUL_MEAT_DW_4_wide (wfracbits, _FP_MUL_MEAT_4_wide_z, \
431 /* Normalize since we know where the msb of the multiplicands \
432 were (bit B), we know that the msb of the of the product is \
433 at either 2B or 2B-1. */ \
434 _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, wfracbits-1, 2*wfracbits); \
435 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3), \
436 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2), \
437 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1), \
438 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0)); \
442 #define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y) \
445 mpn_mul_n (R##_f, _x_f, _y_f, 4); \
449 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
452 _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z); \
454 _FP_MUL_MEAT_DW_4_gmp (wfracbits, _FP_MUL_MEAT_4_gmp_z, X, Y); \
456 /* Normalize since we know where the msb of the multiplicands \
457 were (bit B), we know that the msb of the of the product is \
458 at either 2B or 2B-1. */ \
459 _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, wfracbits-1, 2*wfracbits); \
460 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3), \
461 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2), \
462 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1), \
463 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0)); \
467 /* Helper utility for _FP_DIV_MEAT_4_udiv:
469 #define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \
472 UWtype umul_ppppmnnn_t; \
473 umul_ppmm (p1, p0, m, n0); \
474 umul_ppmm (p2, umul_ppppmnnn_t, m, n1); \
475 __FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t); \
476 umul_ppmm (p3, umul_ppppmnnn_t, m, n2); \
477 __FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t); \
481 /* Division algorithms: */
483 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
486 int _FP_DIV_MEAT_4_udiv_i; \
487 _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n); \
488 _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m); \
489 _FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4); \
490 if (_FP_FRAC_GE_4 (X, Y)) \
492 _FP_DIV_MEAT_4_udiv_n_f[3] \
493 = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
494 _FP_FRAC_SRL_4 (X, 1); \
499 /* Normalize, i.e. make the most significant bit of the \
500 denominator set. */ \
501 _FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \
503 for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--) \
505 if (X##_f[3] == Y##_f[3]) \
507 /* This is a special case, not an optimization \
508 (X##_f[3]/Y##_f[3] would not fit into UWtype). \
509 As X## is guaranteed to be < Y, \
510 R##_f[_FP_DIV_MEAT_4_udiv_i] can be either \
511 (UWtype)-1 or (UWtype)-2. */ \
512 R##_f[_FP_DIV_MEAT_4_udiv_i] = -1; \
513 if (!_FP_DIV_MEAT_4_udiv_i) \
515 __FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
516 Y##_f[2], Y##_f[1], Y##_f[0], 0, \
517 X##_f[2], X##_f[1], X##_f[0], \
518 _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \
519 _FP_FRAC_SUB_4 (X, Y, X); \
520 if (X##_f[3] > Y##_f[3]) \
522 R##_f[_FP_DIV_MEAT_4_udiv_i] = -2; \
523 _FP_FRAC_ADD_4 (X, Y, X); \
528 udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i], \
529 X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
530 umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3], \
531 _FP_DIV_MEAT_4_udiv_m_f[2], \
532 _FP_DIV_MEAT_4_udiv_m_f[1], \
533 _FP_DIV_MEAT_4_udiv_m_f[0], \
534 R##_f[_FP_DIV_MEAT_4_udiv_i], \
535 Y##_f[2], Y##_f[1], Y##_f[0]); \
536 X##_f[2] = X##_f[1]; \
537 X##_f[1] = X##_f[0]; \
539 = _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]; \
540 if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
542 R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
543 _FP_FRAC_ADD_4 (X, Y, X); \
544 if (_FP_FRAC_GE_4 (X, Y) \
545 && _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
547 R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
548 _FP_FRAC_ADD_4 (X, Y, X); \
551 _FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m); \
552 if (!_FP_DIV_MEAT_4_udiv_i) \
554 if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m)) \
555 R##_f[0] |= _FP_WORK_STICKY; \
564 /* Square root algorithms:
565 We have just one right now, maybe Newton approximation
566 should be added for those machines where division is fast. */
568 #define _FP_SQRT_MEAT_4(R, S, T, X, q) \
573 T##_f[3] = S##_f[3] + q; \
574 if (T##_f[3] <= X##_f[3]) \
576 S##_f[3] = T##_f[3] + q; \
577 X##_f[3] -= T##_f[3]; \
580 _FP_FRAC_SLL_4 (X, 1); \
583 q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
586 T##_f[2] = S##_f[2] + q; \
587 T##_f[3] = S##_f[3]; \
588 if (T##_f[3] < X##_f[3] \
589 || (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
591 S##_f[2] = T##_f[2] + q; \
592 S##_f[3] += (T##_f[2] > S##_f[2]); \
593 __FP_FRAC_DEC_2 (X##_f[3], X##_f[2], \
594 T##_f[3], T##_f[2]); \
597 _FP_FRAC_SLL_4 (X, 1); \
600 q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
603 T##_f[1] = S##_f[1] + q; \
604 T##_f[2] = S##_f[2]; \
605 T##_f[3] = S##_f[3]; \
606 if (T##_f[3] < X##_f[3] \
607 || (T##_f[3] == X##_f[3] \
608 && (T##_f[2] < X##_f[2] \
609 || (T##_f[2] == X##_f[2] \
610 && T##_f[1] <= X##_f[1])))) \
612 S##_f[1] = T##_f[1] + q; \
613 S##_f[2] += (T##_f[1] > S##_f[1]); \
614 S##_f[3] += (T##_f[2] > S##_f[2]); \
615 __FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1], \
616 T##_f[3], T##_f[2], T##_f[1]); \
619 _FP_FRAC_SLL_4 (X, 1); \
622 q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
623 while (q != _FP_WORK_ROUND) \
625 T##_f[0] = S##_f[0] + q; \
626 T##_f[1] = S##_f[1]; \
627 T##_f[2] = S##_f[2]; \
628 T##_f[3] = S##_f[3]; \
629 if (_FP_FRAC_GE_4 (X, T)) \
631 S##_f[0] = T##_f[0] + q; \
632 S##_f[1] += (T##_f[0] > S##_f[0]); \
633 S##_f[2] += (T##_f[1] > S##_f[1]); \
634 S##_f[3] += (T##_f[2] > S##_f[2]); \
635 _FP_FRAC_DEC_4 (X, T); \
638 _FP_FRAC_SLL_4 (X, 1); \
641 if (!_FP_FRAC_ZEROP_4 (X)) \
643 if (_FP_FRAC_GT_4 (X, S)) \
644 R##_f[0] |= _FP_WORK_ROUND; \
645 R##_f[0] |= _FP_WORK_STICKY; \
653 #define __FP_FRAC_SET_4(X, I3, I2, I1, I0) \
654 (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
656 #ifndef __FP_FRAC_ADD_3
657 # define __FP_FRAC_ADD_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
660 _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2; \
662 __FP_FRAC_ADD_3_c1 = r0 < x0; \
664 __FP_FRAC_ADD_3_c2 = r1 < x1; \
665 r1 += __FP_FRAC_ADD_3_c1; \
666 __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1; \
667 r2 = x2 + y2 + __FP_FRAC_ADD_3_c2; \
672 #ifndef __FP_FRAC_ADD_4
673 # define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
676 _FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2; \
677 _FP_W_TYPE __FP_FRAC_ADD_4_c3; \
679 __FP_FRAC_ADD_4_c1 = r0 < x0; \
681 __FP_FRAC_ADD_4_c2 = r1 < x1; \
682 r1 += __FP_FRAC_ADD_4_c1; \
683 __FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1; \
685 __FP_FRAC_ADD_4_c3 = r2 < x2; \
686 r2 += __FP_FRAC_ADD_4_c2; \
687 __FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2; \
688 r3 = x3 + y3 + __FP_FRAC_ADD_4_c3; \
693 #ifndef __FP_FRAC_SUB_3
694 # define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
697 _FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2; \
699 __FP_FRAC_SUB_3_c1 = r0 > x0; \
701 __FP_FRAC_SUB_3_c2 = r1 > x1; \
702 r1 -= __FP_FRAC_SUB_3_c1; \
703 __FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1); \
704 r2 = x2 - y2 - __FP_FRAC_SUB_3_c2; \
709 #ifndef __FP_FRAC_SUB_4
710 # define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
713 _FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2; \
714 _FP_W_TYPE __FP_FRAC_SUB_4_c3; \
716 __FP_FRAC_SUB_4_c1 = r0 > x0; \
718 __FP_FRAC_SUB_4_c2 = r1 > x1; \
719 r1 -= __FP_FRAC_SUB_4_c1; \
720 __FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1); \
722 __FP_FRAC_SUB_4_c3 = r2 > x2; \
723 r2 -= __FP_FRAC_SUB_4_c2; \
724 __FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2); \
725 r3 = x3 - y3 - __FP_FRAC_SUB_4_c3; \
730 #ifndef __FP_FRAC_DEC_3
731 # define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \
734 UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1; \
735 UWtype __FP_FRAC_DEC_3_t2; \
736 __FP_FRAC_DEC_3_t0 = x0; \
737 __FP_FRAC_DEC_3_t1 = x1; \
738 __FP_FRAC_DEC_3_t2 = x2; \
739 __FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2, \
740 __FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0, \
746 #ifndef __FP_FRAC_DEC_4
747 # define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \
750 UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1; \
751 UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3; \
752 __FP_FRAC_DEC_4_t0 = x0; \
753 __FP_FRAC_DEC_4_t1 = x1; \
754 __FP_FRAC_DEC_4_t2 = x2; \
755 __FP_FRAC_DEC_4_t3 = x3; \
756 __FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3, \
757 __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1, \
758 __FP_FRAC_DEC_4_t0, y3, y2, y1, y0); \
763 #ifndef __FP_FRAC_ADDI_4
764 # define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \
767 UWtype __FP_FRAC_ADDI_4_t; \
768 __FP_FRAC_ADDI_4_t = ((x0 += i) < i); \
769 x1 += __FP_FRAC_ADDI_4_t; \
770 __FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t); \
771 x2 += __FP_FRAC_ADDI_4_t; \
772 __FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t); \
773 x3 += __FP_FRAC_ADDI_4_t; \
778 /* Convert FP values between word sizes. This appears to be more
779 complicated than I'd have expected it to be, so these might be
780 wrong... These macros are in any case somewhat bogus because they
781 use information about what various FRAC_n variables look like
782 internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
783 the ones in op-2.h and op-1.h. */
784 #define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
786 #define _FP_FRAC_COPY_2_4(D, S) \
794 /* Assembly/disassembly for converting to/from integral types.
795 No shifting or overflow handled here. */
796 /* Put the FP value X into r, which is an integer of size rsize. */
797 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
800 if (rsize <= _FP_W_TYPE_SIZE) \
802 else if (rsize <= 2*_FP_W_TYPE_SIZE) \
805 r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \
810 /* I'm feeling lazy so we deal with int == 3words \
811 (implausible) and int == 4words as a single case. */ \
813 r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \
815 r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \
817 r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \
823 /* "No disassemble Number Five!" */
824 /* Move an integer of size rsize into X's fractional part. We rely on
825 the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
826 having to mask the values we store into it. */
827 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
831 X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
832 X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
833 X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
837 #define _FP_FRAC_COPY_4_1(D, S) \
841 D##_f[1] = D##_f[2] = D##_f[3] = 0; \
845 #define _FP_FRAC_COPY_4_2(D, S) \
850 D##_f[2] = D##_f[3] = 0; \
854 #define _FP_FRAC_COPY_4_4(D, S) _FP_FRAC_COPY_4 (D, S)