1 /* mpfr_fma -- Floating multiply-add
3 Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Cacao projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 #include "mpfr-impl.h"
25 /* The fused-multiply-add (fma) of x, y and z is defined by:
30 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
35 MPFR_SAVE_EXPO_DECL (expo);
36 MPFR_GROUP_DECL(group);
38 /* particular cases */
39 if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
40 MPFR_IS_SINGULAR(y) ||
41 MPFR_IS_SINGULAR(z) ))
43 if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
48 /* now neither x, y or z is NaN */
49 else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
51 /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
52 if ((MPFR_IS_ZERO(y)) ||
55 ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
60 else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
63 MPFR_SET_SAME_SIGN(s, z);
66 else /* z is finite */
69 MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
73 /* now x and y are finite */
74 else if (MPFR_IS_INF(z))
77 MPFR_SET_SAME_SIGN(s, z);
80 else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
85 sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
86 MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
87 ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
89 ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
95 return mpfr_set (s, z, rnd_mode);
97 else /* necessarily z is zero here */
99 MPFR_ASSERTD(MPFR_IS_ZERO(z));
100 return mpfr_mul (s, x, y, rnd_mode);
104 /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
105 is exact, except in case of overflow or underflow. */
106 MPFR_SAVE_EXPO_MARK (expo);
107 MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
109 if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
111 /* overflow or underflow - this case is regarded as rare, thus
112 does not need to be very efficient (even if some tests below
113 could have been done earlier).
114 It is an overflow iff u is an infinity (since MPFR_RNDN was used).
115 Alternatively, we could test the overflow flag, but in this case,
116 mpfr_clear_flags would have been necessary. */
117 if (MPFR_IS_INF (u)) /* overflow */
119 /* Let's eliminate the obvious case where x*y and z have the
120 same sign. No possible cancellation -> real overflow.
121 Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
122 then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
123 is also an overflow. */
124 if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
125 MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
127 MPFR_GROUP_CLEAR (group);
128 MPFR_SAVE_EXPO_FREE (expo);
129 return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
132 /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
133 (x/4)*y does not overflow (let's recall that the result
134 is exact with an unbounded exponent range). It does not
135 underflow either, because x*y overflows and the exponent
136 range is large enough. */
137 inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
138 MPFR_ASSERTN (inexact == 0);
139 inexact = mpfr_mul (u, u, y, MPFR_RNDN);
140 MPFR_ASSERTN (inexact == 0);
142 /* Now, we need to add z/4... But it may underflow! */
146 MPFR_BLOCK_DECL (flags);
148 if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
149 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
151 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
156 mpfr_init2 (zo4, MPFR_PREC (z));
157 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
159 /* The division by 4 underflowed! */
160 MPFR_ASSERTN (0); /* TODO... */
165 /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
166 following addition would give the same result). */
167 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
168 /* u and zz have different signs, so that an overflow
169 is not possible. But an underflow is theoretically
171 if (MPFR_UNDERFLOW (flags))
173 MPFR_ASSERTN (zz != z);
174 MPFR_ASSERTN (0); /* TODO... */
175 mpfr_clears (zo4, u, (mpfr_ptr) 0);
183 MPFR_GROUP_CLEAR (group);
184 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
185 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
186 if (inex2) /* overflow */
189 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
195 else /* underflow: one has |xy| < 2^(emin-1). */
197 unsigned long scale = 0;
204 /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
205 (the + 1 on MPFR_PREC (s) is necessary because the exponent
206 of the result can be EXP(z) - 1). */
207 diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
208 pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
213 MPFR_BLOCK_DECL (flags);
215 uscale = (mpfr_uexp_t) pzs - diffexp + 1;
216 MPFR_ASSERTN (uscale > 0);
217 MPFR_ASSERTN (uscale <= ULONG_MAX);
219 mpfr_init2 (scaled_z, MPFR_PREC (z));
220 inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
221 MPFR_ASSERTN (inexact == 0); /* TODO: overflow case */
223 /* Now we need to recompute u = xy * 2^scale. */
225 if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
227 mpfr_init2 (scaled_v, MPFR_PREC (x));
228 mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
229 mpfr_mul (u, scaled_v, y, MPFR_RNDN);
233 mpfr_init2 (scaled_v, MPFR_PREC (y));
234 mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
235 mpfr_mul (u, x, scaled_v, MPFR_RNDN);
237 mpfr_clear (scaled_v);
238 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
239 xy_underflows = MPFR_UNDERFLOW (flags);
249 /* Let's replace xy by sign(xy) * 2^(emin-1). */
250 MPFR_PREC (u) = MPFR_PREC_MIN;
251 mpfr_setmin (u, __gmpfr_emin);
252 MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
257 MPFR_BLOCK_DECL (flags);
259 MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
260 MPFR_GROUP_CLEAR (group);
265 mpfr_clear (scaled_z);
266 /* Here an overflow is theoretically possible, in which case
267 the result may be wrong, hence the assert. An underflow
268 is not possible, but let's check that anyway. */
269 MPFR_ASSERTN (! MPFR_OVERFLOW (flags)); /* TODO... */
270 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags)); /* not possible */
271 inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
272 /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
273 Also, handle the double rounding case:
274 s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
275 if (inex2) /* underflow */
280 /* FIXME/TODO: I'm not sure that the following is correct.
281 Check for possible spurious exceptions due to intermediate
283 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
288 inexact = mpfr_add (s, u, z, rnd_mode);
289 MPFR_GROUP_CLEAR (group);
290 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
292 MPFR_SAVE_EXPO_FREE (expo);
293 return mpfr_check_range (s, inexact, rnd_mode);