1 /* Software floating-point emulation.
2 Basic four-word fraction declaration and manipulation.
3 Copyright (C) 1997-2014 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
34 #define _FP_FRAC_COPY_4(D, S) \
35 (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
36 D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
37 #define _FP_FRAC_SET_4(X, I) __FP_FRAC_SET_4 (X, I)
38 #define _FP_FRAC_HIGH_4(X) (X##_f[3])
39 #define _FP_FRAC_LOW_4(X) (X##_f[0])
40 #define _FP_FRAC_WORD_4(X, w) (X##_f[w])
42 #define _FP_FRAC_SLL_4(X, N) \
45 _FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down; \
46 _FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i; \
47 _FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE; \
48 _FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE; \
49 _FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up; \
50 if (!_FP_FRAC_SLL_4_up) \
51 for (_FP_FRAC_SLL_4_i = 3; \
52 _FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip; \
54 X##_f[_FP_FRAC_SLL_4_i] \
55 = X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]; \
58 for (_FP_FRAC_SLL_4_i = 3; \
59 _FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip; \
61 X##_f[_FP_FRAC_SLL_4_i] \
62 = ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip] \
63 << _FP_FRAC_SLL_4_up) \
64 | (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1] \
65 >> _FP_FRAC_SLL_4_down)); \
66 X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up; \
68 for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i) \
69 X##_f[_FP_FRAC_SLL_4_i] = 0; \
73 /* This one was broken too. */
74 #define _FP_FRAC_SRL_4(X, N) \
77 _FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down; \
78 _FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i; \
79 _FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE; \
80 _FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE; \
81 _FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down; \
82 if (!_FP_FRAC_SRL_4_down) \
83 for (_FP_FRAC_SRL_4_i = 0; \
84 _FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip; \
86 X##_f[_FP_FRAC_SRL_4_i] \
87 = X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]; \
90 for (_FP_FRAC_SRL_4_i = 0; \
91 _FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip; \
93 X##_f[_FP_FRAC_SRL_4_i] \
94 = ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip] \
95 >> _FP_FRAC_SRL_4_down) \
96 | (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1] \
97 << _FP_FRAC_SRL_4_up)); \
98 X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down; \
100 for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i) \
101 X##_f[_FP_FRAC_SRL_4_i] = 0; \
106 /* Right shift with sticky-lsb.
107 What this actually means is that we do a standard right-shift,
108 but that if any of the bits that fall off the right hand side
109 were one then we always set the LSbit. */
110 #define _FP_FRAC_SRST_4(X, S, N, size) \
113 _FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down; \
114 _FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i; \
115 _FP_W_TYPE _FP_FRAC_SRST_4_s; \
116 _FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE; \
117 _FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE; \
118 _FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down; \
119 for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0; \
120 _FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip; \
121 ++_FP_FRAC_SRST_4_i) \
122 _FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i]; \
123 if (!_FP_FRAC_SRST_4_down) \
124 for (_FP_FRAC_SRST_4_i = 0; \
125 _FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip; \
126 ++_FP_FRAC_SRST_4_i) \
127 X##_f[_FP_FRAC_SRST_4_i] \
128 = X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]; \
132 |= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up; \
133 for (_FP_FRAC_SRST_4_i = 0; \
134 _FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip; \
135 ++_FP_FRAC_SRST_4_i) \
136 X##_f[_FP_FRAC_SRST_4_i] \
137 = ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip] \
138 >> _FP_FRAC_SRST_4_down) \
139 | (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1] \
140 << _FP_FRAC_SRST_4_up)); \
141 X##_f[_FP_FRAC_SRST_4_i++] \
142 = X##_f[3] >> _FP_FRAC_SRST_4_down; \
144 for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i) \
145 X##_f[_FP_FRAC_SRST_4_i] = 0; \
146 S = (_FP_FRAC_SRST_4_s != 0); \
150 #define _FP_FRAC_SRS_4(X, N, size) \
153 int _FP_FRAC_SRS_4_sticky; \
154 _FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, (N), (size)); \
155 X##_f[0] |= _FP_FRAC_SRS_4_sticky; \
159 #define _FP_FRAC_ADD_4(R, X, Y) \
160 __FP_FRAC_ADD_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
161 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
162 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
164 #define _FP_FRAC_SUB_4(R, X, Y) \
165 __FP_FRAC_SUB_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
166 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
167 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
169 #define _FP_FRAC_DEC_4(X, Y) \
170 __FP_FRAC_DEC_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
171 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
173 #define _FP_FRAC_ADDI_4(X, I) \
174 __FP_FRAC_ADDI_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
176 #define _FP_ZEROFRAC_4 0, 0, 0, 0
177 #define _FP_MINFRAC_4 0, 0, 0, 1
178 #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
180 #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
181 #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE) X##_f[3] < 0)
182 #define _FP_FRAC_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
183 #define _FP_FRAC_HIGHBIT_DW_4(fs, X) \
184 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
185 #define _FP_FRAC_CLEAR_OVERP_4(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
187 #define _FP_FRAC_EQ_4(X, Y) \
188 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
189 && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
191 #define _FP_FRAC_GT_4(X, Y) \
192 (X##_f[3] > Y##_f[3] \
193 || (X##_f[3] == Y##_f[3] \
194 && (X##_f[2] > Y##_f[2] \
195 || (X##_f[2] == Y##_f[2] \
196 && (X##_f[1] > Y##_f[1] \
197 || (X##_f[1] == Y##_f[1] \
198 && X##_f[0] > Y##_f[0]))))))
200 #define _FP_FRAC_GE_4(X, Y) \
201 (X##_f[3] > Y##_f[3] \
202 || (X##_f[3] == Y##_f[3] \
203 && (X##_f[2] > Y##_f[2] \
204 || (X##_f[2] == Y##_f[2] \
205 && (X##_f[1] > Y##_f[1] \
206 || (X##_f[1] == Y##_f[1] \
207 && X##_f[0] >= Y##_f[0]))))))
210 #define _FP_FRAC_CLZ_4(R, X) \
214 __FP_CLZ ((R), X##_f[3]); \
217 __FP_CLZ ((R), X##_f[2]); \
218 (R) += _FP_W_TYPE_SIZE; \
222 __FP_CLZ ((R), X##_f[1]); \
223 (R) += _FP_W_TYPE_SIZE*2; \
227 __FP_CLZ ((R), X##_f[0]); \
228 (R) += _FP_W_TYPE_SIZE*3; \
234 #define _FP_UNPACK_RAW_4(fs, X, val) \
237 union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo; \
238 _FP_UNPACK_RAW_4_flo.flt = (val); \
239 X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0; \
240 X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1; \
241 X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2; \
242 X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3; \
243 X##_e = _FP_UNPACK_RAW_4_flo.bits.exp; \
244 X##_s = _FP_UNPACK_RAW_4_flo.bits.sign; \
248 #define _FP_UNPACK_RAW_4_P(fs, X, val) \
251 union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo \
252 = (union _FP_UNION_##fs *) (val); \
254 X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0; \
255 X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1; \
256 X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2; \
257 X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3; \
258 X##_e = _FP_UNPACK_RAW_4_P_flo->bits.exp; \
259 X##_s = _FP_UNPACK_RAW_4_P_flo->bits.sign; \
263 #define _FP_PACK_RAW_4(fs, val, X) \
266 union _FP_UNION_##fs _FP_PACK_RAW_4_flo; \
267 _FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0]; \
268 _FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1]; \
269 _FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2]; \
270 _FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3]; \
271 _FP_PACK_RAW_4_flo.bits.exp = X##_e; \
272 _FP_PACK_RAW_4_flo.bits.sign = X##_s; \
273 (val) = _FP_PACK_RAW_4_flo.flt; \
277 #define _FP_PACK_RAW_4_P(fs, val, X) \
280 union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo \
281 = (union _FP_UNION_##fs *) (val); \
283 _FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0]; \
284 _FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1]; \
285 _FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2]; \
286 _FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3]; \
287 _FP_PACK_RAW_4_P_flo->bits.exp = X##_e; \
288 _FP_PACK_RAW_4_P_flo->bits.sign = X##_s; \
292 /* Multiplication algorithms: */
294 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
296 #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
299 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b); \
300 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c); \
301 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d); \
302 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e); \
303 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f); \
305 doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), \
306 X##_f[0], Y##_f[0]); \
307 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
308 X##_f[0], Y##_f[1]); \
309 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
310 X##_f[1], Y##_f[0]); \
311 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
312 X##_f[1], Y##_f[1]); \
313 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
314 X##_f[0], Y##_f[2]); \
315 doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
316 X##_f[2], Y##_f[0]); \
317 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
318 _FP_FRAC_WORD_8 (R, 1), 0, \
319 _FP_MUL_MEAT_DW_4_wide_b_f1, \
320 _FP_MUL_MEAT_DW_4_wide_b_f0, \
321 0, 0, _FP_FRAC_WORD_8 (R, 1)); \
322 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
323 _FP_FRAC_WORD_8 (R, 1), 0, \
324 _FP_MUL_MEAT_DW_4_wide_c_f1, \
325 _FP_MUL_MEAT_DW_4_wide_c_f0, \
326 _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
327 _FP_FRAC_WORD_8 (R, 1)); \
328 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
329 _FP_FRAC_WORD_8 (R, 2), 0, \
330 _FP_MUL_MEAT_DW_4_wide_d_f1, \
331 _FP_MUL_MEAT_DW_4_wide_d_f0, \
332 0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
333 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
334 _FP_FRAC_WORD_8 (R, 2), 0, \
335 _FP_MUL_MEAT_DW_4_wide_e_f1, \
336 _FP_MUL_MEAT_DW_4_wide_e_f0, \
337 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
338 _FP_FRAC_WORD_8 (R, 2)); \
339 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
340 _FP_FRAC_WORD_8 (R, 2), 0, \
341 _FP_MUL_MEAT_DW_4_wide_f_f1, \
342 _FP_MUL_MEAT_DW_4_wide_f_f0, \
343 _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
344 _FP_FRAC_WORD_8 (R, 2)); \
345 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, \
346 _FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]); \
347 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, \
348 _FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]); \
349 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
350 X##_f[1], Y##_f[2]); \
351 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
352 X##_f[2], Y##_f[1]); \
353 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
354 _FP_FRAC_WORD_8 (R, 3), 0, \
355 _FP_MUL_MEAT_DW_4_wide_b_f1, \
356 _FP_MUL_MEAT_DW_4_wide_b_f0, \
357 0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
358 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
359 _FP_FRAC_WORD_8 (R, 3), 0, \
360 _FP_MUL_MEAT_DW_4_wide_c_f1, \
361 _FP_MUL_MEAT_DW_4_wide_c_f0, \
362 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
363 _FP_FRAC_WORD_8 (R, 3)); \
364 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
365 _FP_FRAC_WORD_8 (R, 3), 0, \
366 _FP_MUL_MEAT_DW_4_wide_d_f1, \
367 _FP_MUL_MEAT_DW_4_wide_d_f0, \
368 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
369 _FP_FRAC_WORD_8 (R, 3)); \
370 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
371 _FP_FRAC_WORD_8 (R, 3), 0, \
372 _FP_MUL_MEAT_DW_4_wide_e_f1, \
373 _FP_MUL_MEAT_DW_4_wide_e_f0, \
374 _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
375 _FP_FRAC_WORD_8 (R, 3)); \
376 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
377 X##_f[2], Y##_f[2]); \
378 doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
379 X##_f[1], Y##_f[3]); \
380 doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
381 X##_f[3], Y##_f[1]); \
382 doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
383 X##_f[2], Y##_f[3]); \
384 doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
385 X##_f[3], Y##_f[2]); \
386 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
387 _FP_FRAC_WORD_8 (R, 4), 0, \
388 _FP_MUL_MEAT_DW_4_wide_b_f1, \
389 _FP_MUL_MEAT_DW_4_wide_b_f0, \
390 0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
391 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
392 _FP_FRAC_WORD_8 (R, 4), 0, \
393 _FP_MUL_MEAT_DW_4_wide_c_f1, \
394 _FP_MUL_MEAT_DW_4_wide_c_f0, \
395 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
396 _FP_FRAC_WORD_8 (R, 4)); \
397 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
398 _FP_FRAC_WORD_8 (R, 4), 0, \
399 _FP_MUL_MEAT_DW_4_wide_d_f1, \
400 _FP_MUL_MEAT_DW_4_wide_d_f0, \
401 _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
402 _FP_FRAC_WORD_8 (R, 4)); \
403 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
404 _FP_FRAC_WORD_8 (R, 5), 0, \
405 _FP_MUL_MEAT_DW_4_wide_e_f1, \
406 _FP_MUL_MEAT_DW_4_wide_e_f0, \
407 0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
408 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
409 _FP_FRAC_WORD_8 (R, 5), 0, \
410 _FP_MUL_MEAT_DW_4_wide_f_f1, \
411 _FP_MUL_MEAT_DW_4_wide_f_f0, \
412 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
413 _FP_FRAC_WORD_8 (R, 5)); \
414 doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
415 X##_f[3], Y##_f[3]); \
416 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
417 _FP_MUL_MEAT_DW_4_wide_b_f1, \
418 _FP_MUL_MEAT_DW_4_wide_b_f0, \
419 _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
423 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
426 _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z); \
428 _FP_MUL_MEAT_DW_4_wide ((wfracbits), _FP_MUL_MEAT_4_wide_z, \
431 /* Normalize since we know where the msb of the multiplicands \
432 were (bit B), we know that the msb of the of the product is \
433 at either 2B or 2B-1. */ \
434 _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, (wfracbits)-1, \
436 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3), \
437 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2), \
438 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1), \
439 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0)); \
443 #define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y) \
446 mpn_mul_n (R##_f, _x_f, _y_f, 4); \
450 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
453 _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z); \
455 _FP_MUL_MEAT_DW_4_gmp ((wfracbits), _FP_MUL_MEAT_4_gmp_z, X, Y); \
457 /* Normalize since we know where the msb of the multiplicands \
458 were (bit B), we know that the msb of the of the product is \
459 at either 2B or 2B-1. */ \
460 _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, (wfracbits)-1, \
462 __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3), \
463 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2), \
464 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1), \
465 _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0)); \
469 /* Helper utility for _FP_DIV_MEAT_4_udiv:
471 #define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \
474 UWtype umul_ppppmnnn_t; \
475 umul_ppmm (p1, p0, m, n0); \
476 umul_ppmm (p2, umul_ppppmnnn_t, m, n1); \
477 __FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t); \
478 umul_ppmm (p3, umul_ppppmnnn_t, m, n2); \
479 __FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t); \
483 /* Division algorithms: */
485 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
488 int _FP_DIV_MEAT_4_udiv_i; \
489 _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n); \
490 _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m); \
491 _FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4); \
492 if (_FP_FRAC_GE_4 (X, Y)) \
494 _FP_DIV_MEAT_4_udiv_n_f[3] \
495 = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
496 _FP_FRAC_SRL_4 (X, 1); \
501 /* Normalize, i.e. make the most significant bit of the \
502 denominator set. */ \
503 _FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \
505 for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--) \
507 if (X##_f[3] == Y##_f[3]) \
509 /* This is a special case, not an optimization \
510 (X##_f[3]/Y##_f[3] would not fit into UWtype). \
511 As X## is guaranteed to be < Y, \
512 R##_f[_FP_DIV_MEAT_4_udiv_i] can be either \
513 (UWtype)-1 or (UWtype)-2. */ \
514 R##_f[_FP_DIV_MEAT_4_udiv_i] = -1; \
515 if (!_FP_DIV_MEAT_4_udiv_i) \
517 __FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
518 Y##_f[2], Y##_f[1], Y##_f[0], 0, \
519 X##_f[2], X##_f[1], X##_f[0], \
520 _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \
521 _FP_FRAC_SUB_4 (X, Y, X); \
522 if (X##_f[3] > Y##_f[3]) \
524 R##_f[_FP_DIV_MEAT_4_udiv_i] = -2; \
525 _FP_FRAC_ADD_4 (X, Y, X); \
530 udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i], \
531 X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
532 umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3], \
533 _FP_DIV_MEAT_4_udiv_m_f[2], \
534 _FP_DIV_MEAT_4_udiv_m_f[1], \
535 _FP_DIV_MEAT_4_udiv_m_f[0], \
536 R##_f[_FP_DIV_MEAT_4_udiv_i], \
537 Y##_f[2], Y##_f[1], Y##_f[0]); \
538 X##_f[2] = X##_f[1]; \
539 X##_f[1] = X##_f[0]; \
541 = _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]; \
542 if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
544 R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
545 _FP_FRAC_ADD_4 (X, Y, X); \
546 if (_FP_FRAC_GE_4 (X, Y) \
547 && _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
549 R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
550 _FP_FRAC_ADD_4 (X, Y, X); \
553 _FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m); \
554 if (!_FP_DIV_MEAT_4_udiv_i) \
556 if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m)) \
557 R##_f[0] |= _FP_WORK_STICKY; \
566 /* Square root algorithms:
567 We have just one right now, maybe Newton approximation
568 should be added for those machines where division is fast. */
570 #define _FP_SQRT_MEAT_4(R, S, T, X, q) \
575 T##_f[3] = S##_f[3] + (q); \
576 if (T##_f[3] <= X##_f[3]) \
578 S##_f[3] = T##_f[3] + (q); \
579 X##_f[3] -= T##_f[3]; \
582 _FP_FRAC_SLL_4 (X, 1); \
585 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
588 T##_f[2] = S##_f[2] + (q); \
589 T##_f[3] = S##_f[3]; \
590 if (T##_f[3] < X##_f[3] \
591 || (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
593 S##_f[2] = T##_f[2] + (q); \
594 S##_f[3] += (T##_f[2] > S##_f[2]); \
595 __FP_FRAC_DEC_2 (X##_f[3], X##_f[2], \
596 T##_f[3], T##_f[2]); \
599 _FP_FRAC_SLL_4 (X, 1); \
602 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
605 T##_f[1] = S##_f[1] + (q); \
606 T##_f[2] = S##_f[2]; \
607 T##_f[3] = S##_f[3]; \
608 if (T##_f[3] < X##_f[3] \
609 || (T##_f[3] == X##_f[3] \
610 && (T##_f[2] < X##_f[2] \
611 || (T##_f[2] == X##_f[2] \
612 && T##_f[1] <= X##_f[1])))) \
614 S##_f[1] = T##_f[1] + (q); \
615 S##_f[2] += (T##_f[1] > S##_f[1]); \
616 S##_f[3] += (T##_f[2] > S##_f[2]); \
617 __FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1], \
618 T##_f[3], T##_f[2], T##_f[1]); \
621 _FP_FRAC_SLL_4 (X, 1); \
624 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
625 while ((q) != _FP_WORK_ROUND) \
627 T##_f[0] = S##_f[0] + (q); \
628 T##_f[1] = S##_f[1]; \
629 T##_f[2] = S##_f[2]; \
630 T##_f[3] = S##_f[3]; \
631 if (_FP_FRAC_GE_4 (X, T)) \
633 S##_f[0] = T##_f[0] + (q); \
634 S##_f[1] += (T##_f[0] > S##_f[0]); \
635 S##_f[2] += (T##_f[1] > S##_f[1]); \
636 S##_f[3] += (T##_f[2] > S##_f[2]); \
637 _FP_FRAC_DEC_4 (X, T); \
640 _FP_FRAC_SLL_4 (X, 1); \
643 if (!_FP_FRAC_ZEROP_4 (X)) \
645 if (_FP_FRAC_GT_4 (X, S)) \
646 R##_f[0] |= _FP_WORK_ROUND; \
647 R##_f[0] |= _FP_WORK_STICKY; \
655 #define __FP_FRAC_SET_4(X, I3, I2, I1, I0) \
656 (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
658 #ifndef __FP_FRAC_ADD_3
659 # define __FP_FRAC_ADD_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
662 _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2; \
664 __FP_FRAC_ADD_3_c1 = r0 < x0; \
666 __FP_FRAC_ADD_3_c2 = r1 < x1; \
667 r1 += __FP_FRAC_ADD_3_c1; \
668 __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1; \
669 r2 = x2 + y2 + __FP_FRAC_ADD_3_c2; \
674 #ifndef __FP_FRAC_ADD_4
675 # define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
678 _FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2; \
679 _FP_W_TYPE __FP_FRAC_ADD_4_c3; \
681 __FP_FRAC_ADD_4_c1 = r0 < x0; \
683 __FP_FRAC_ADD_4_c2 = r1 < x1; \
684 r1 += __FP_FRAC_ADD_4_c1; \
685 __FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1; \
687 __FP_FRAC_ADD_4_c3 = r2 < x2; \
688 r2 += __FP_FRAC_ADD_4_c2; \
689 __FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2; \
690 r3 = x3 + y3 + __FP_FRAC_ADD_4_c3; \
695 #ifndef __FP_FRAC_SUB_3
696 # define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
699 _FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2; \
701 __FP_FRAC_SUB_3_c1 = r0 > x0; \
703 __FP_FRAC_SUB_3_c2 = r1 > x1; \
704 r1 -= __FP_FRAC_SUB_3_c1; \
705 __FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1); \
706 r2 = x2 - y2 - __FP_FRAC_SUB_3_c2; \
711 #ifndef __FP_FRAC_SUB_4
712 # define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
715 _FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2; \
716 _FP_W_TYPE __FP_FRAC_SUB_4_c3; \
718 __FP_FRAC_SUB_4_c1 = r0 > x0; \
720 __FP_FRAC_SUB_4_c2 = r1 > x1; \
721 r1 -= __FP_FRAC_SUB_4_c1; \
722 __FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1); \
724 __FP_FRAC_SUB_4_c3 = r2 > x2; \
725 r2 -= __FP_FRAC_SUB_4_c2; \
726 __FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2); \
727 r3 = x3 - y3 - __FP_FRAC_SUB_4_c3; \
732 #ifndef __FP_FRAC_DEC_3
733 # define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \
736 UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1; \
737 UWtype __FP_FRAC_DEC_3_t2; \
738 __FP_FRAC_DEC_3_t0 = x0; \
739 __FP_FRAC_DEC_3_t1 = x1; \
740 __FP_FRAC_DEC_3_t2 = x2; \
741 __FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2, \
742 __FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0, \
748 #ifndef __FP_FRAC_DEC_4
749 # define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \
752 UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1; \
753 UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3; \
754 __FP_FRAC_DEC_4_t0 = x0; \
755 __FP_FRAC_DEC_4_t1 = x1; \
756 __FP_FRAC_DEC_4_t2 = x2; \
757 __FP_FRAC_DEC_4_t3 = x3; \
758 __FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3, \
759 __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1, \
760 __FP_FRAC_DEC_4_t0, y3, y2, y1, y0); \
765 #ifndef __FP_FRAC_ADDI_4
766 # define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \
769 UWtype __FP_FRAC_ADDI_4_t; \
770 __FP_FRAC_ADDI_4_t = ((x0 += i) < i); \
771 x1 += __FP_FRAC_ADDI_4_t; \
772 __FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t); \
773 x2 += __FP_FRAC_ADDI_4_t; \
774 __FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t); \
775 x3 += __FP_FRAC_ADDI_4_t; \
780 /* Convert FP values between word sizes. This appears to be more
781 complicated than I'd have expected it to be, so these might be
782 wrong... These macros are in any case somewhat bogus because they
783 use information about what various FRAC_n variables look like
784 internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
785 the ones in op-2.h and op-1.h. */
786 #define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
788 #define _FP_FRAC_COPY_2_4(D, S) \
796 /* Assembly/disassembly for converting to/from integral types.
797 No shifting or overflow handled here. */
798 /* Put the FP value X into r, which is an integer of size rsize. */
799 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
802 if ((rsize) <= _FP_W_TYPE_SIZE) \
804 else if ((rsize) <= 2*_FP_W_TYPE_SIZE) \
807 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
809 : (r) << _FP_W_TYPE_SIZE); \
814 /* I'm feeling lazy so we deal with int == 3words \
815 (implausible) and int == 4words as a single case. */ \
817 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
819 : (r) << _FP_W_TYPE_SIZE); \
821 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
823 : (r) << _FP_W_TYPE_SIZE); \
825 (r) = ((rsize) <= _FP_W_TYPE_SIZE \
827 : (r) << _FP_W_TYPE_SIZE); \
833 /* "No disassemble Number Five!" */
834 /* Move an integer of size rsize into X's fractional part. We rely on
835 the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
836 having to mask the values we store into it. */
837 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
841 X##_f[1] = ((rsize) <= _FP_W_TYPE_SIZE \
843 : (r) >> _FP_W_TYPE_SIZE); \
844 X##_f[2] = ((rsize) <= 2*_FP_W_TYPE_SIZE \
846 : (r) >> 2*_FP_W_TYPE_SIZE); \
847 X##_f[3] = ((rsize) <= 3*_FP_W_TYPE_SIZE \
849 : (r) >> 3*_FP_W_TYPE_SIZE); \
853 #define _FP_FRAC_COPY_4_1(D, S) \
857 D##_f[1] = D##_f[2] = D##_f[3] = 0; \
861 #define _FP_FRAC_COPY_4_2(D, S) \
866 D##_f[2] = D##_f[3] = 0; \
870 #define _FP_FRAC_COPY_4_4(D, S) _FP_FRAC_COPY_4 (D, S)