fma.c

   1 /* mpfr_fma -- Floating multiply-add
   2
   3 Copyright 2001, 2002, 2004, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Cacao projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 #include "mpfr-impl.h"
  24
  25 /* The fused-multiply-add (fma) of x, y and z is defined by:
  26    fma(x,y,z)= x*y + z
  27 */
  28
  29 int
  30 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
  31           mpfr_rnd_t rnd_mode)
  32 {
  33   int inexact;
  34   mpfr_t u;
  35   MPFR_SAVE_EXPO_DECL (expo);
  36   MPFR_GROUP_DECL(group);
  37
  38   /* particular cases */
  39   if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) ||
  40                      MPFR_IS_SINGULAR(y) ||
  41                      MPFR_IS_SINGULAR(z) ))
  42     {
  43       if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
  44         {
  45           MPFR_SET_NAN(s);
  46           MPFR_RET_NAN;
  47         }
  48       /* now neither x, y or z is NaN */
  49       else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
  50         {
  51           /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
  52           if ((MPFR_IS_ZERO(y)) ||
  53               (MPFR_IS_ZERO(x)) ||
  54               (MPFR_IS_INF(z) &&
  55                ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
  56             {
  57               MPFR_SET_NAN(s);
  58               MPFR_RET_NAN;
  59             }
  60           else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
  61             {
  62               MPFR_SET_INF(s);
  63               MPFR_SET_SAME_SIGN(s, z);
  64               MPFR_RET(0);
  65             }
  66           else /* z is finite */
  67             {
  68               MPFR_SET_INF(s);
  69               MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x) , MPFR_SIGN(y)));
  70               MPFR_RET(0);
  71             }
  72         }
  73       /* now x and y are finite */
  74       else if (MPFR_IS_INF(z))
  75         {
  76           MPFR_SET_INF(s);
  77           MPFR_SET_SAME_SIGN(s, z);
  78           MPFR_RET(0);
  79         }
  80       else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
  81         {
  82           if (MPFR_IS_ZERO(z))
  83             {
  84               int sign_p;
  85               sign_p = MPFR_MULT_SIGN( MPFR_SIGN(x) , MPFR_SIGN(y) );
  86               MPFR_SET_SIGN(s,(rnd_mode != MPFR_RNDD ?
  87                                ((MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z))
  88                                 ? -1 : 1) :
  89                                ((MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z))
  90                                 ? 1 : -1)));
  91               MPFR_SET_ZERO(s);
  92               MPFR_RET(0);
  93             }
  94           else
  95             return mpfr_set (s, z, rnd_mode);
  96         }
  97       else /* necessarily z is zero here */
  98         {
  99           MPFR_ASSERTD(MPFR_IS_ZERO(z));
 100           return mpfr_mul (s, x, y, rnd_mode);
 101         }
 102     }
 103
 104   /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
 105      is exact, except in case of overflow or underflow. */
 106   MPFR_SAVE_EXPO_MARK (expo);
 107   MPFR_GROUP_INIT_1 (group, MPFR_PREC(x) + MPFR_PREC(y), u);
 108
 109   if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
 110     {
 111       /* overflow or underflow - this case is regarded as rare, thus
 112          does not need to be very efficient (even if some tests below
 113          could have been done earlier).
 114          It is an overflow iff u is an infinity (since MPFR_RNDN was used).
 115          Alternatively, we could test the overflow flag, but in this case,
 116          mpfr_clear_flags would have been necessary. */
 117       if (MPFR_IS_INF (u))  /* overflow */
 118         {
 119           /* Let's eliminate the obvious case where x*y and z have the
 120              same sign. No possible cancellation -> real overflow.
 121              Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
 122              then |x*y| >= 2^(emax+1), and |x*y + z| >= 2^emax. This case
 123              is also an overflow. */
 124           if (MPFR_SIGN (u) == MPFR_SIGN (z) ||
 125               MPFR_GET_EXP (x) + MPFR_GET_EXP (y) >= __gmpfr_emax + 3)
 126             {
 127               MPFR_GROUP_CLEAR (group);
 128               MPFR_SAVE_EXPO_FREE (expo);
 129               return mpfr_overflow (s, rnd_mode, MPFR_SIGN (z));
 130             }
 131
 132           /* E(x) + E(y) <= emax+2, therefore |x*y| < 2^(emax+2), and
 133              (x/4)*y does not overflow (let's recall that the result
 134              is exact with an unbounded exponent range). It does not
 135              underflow either, because x*y overflows and the exponent
 136              range is large enough. */
 137           inexact = mpfr_div_2ui (u, x, 2, MPFR_RNDN);
 138           MPFR_ASSERTN (inexact == 0);
 139           inexact = mpfr_mul (u, u, y, MPFR_RNDN);
 140           MPFR_ASSERTN (inexact == 0);
 141
 142           /* Now, we need to add z/4... But it may underflow! */
 143           {
 144             mpfr_t zo4;
 145             mpfr_srcptr zz;
 146             MPFR_BLOCK_DECL (flags);
 147
 148             if (MPFR_GET_EXP (u) > MPFR_GET_EXP (z) &&
 149                 MPFR_GET_EXP (u) - MPFR_GET_EXP (z) > MPFR_PREC (u))
 150               {
 151                 /* |z| < ulp(u)/2, therefore one can use z instead of z/4. */
 152                 zz = z;
 153               }
 154             else
 155               {
 156                 mpfr_init2 (zo4, MPFR_PREC (z));
 157                 if (mpfr_div_2ui (zo4, z, 2, MPFR_RNDZ))
 158                   {
 159                     /* The division by 4 underflowed! */
 160                     MPFR_ASSERTN (0); /* TODO... */
 161                   }
 162                 zz = zo4;
 163               }
 164
 165             /* Let's recall that u = x*y/4 and zz = z/4 (or z if the
 166                following addition would give the same result). */
 167             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, zz, rnd_mode));
 168             /* u and zz have different signs, so that an overflow
 169                is not possible. But an underflow is theoretically
 170                possible! */
 171             if (MPFR_UNDERFLOW (flags))
 172               {
 173                 MPFR_ASSERTN (zz != z);
 174                 MPFR_ASSERTN (0); /* TODO... */
 175                 mpfr_clears (zo4, u, (mpfr_ptr) 0);
 176               }
 177             else
 178               {
 179                 int inex2;
 180
 181                 if (zz != z)
 182                   mpfr_clear (zo4);
 183                 MPFR_GROUP_CLEAR (group);
 184                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 185                 inex2 = mpfr_mul_2ui (s, s, 2, rnd_mode);
 186                 if (inex2)  /* overflow */
 187                   {
 188                     inexact = inex2;
 189                     MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 190                   }
 191                 goto end;
 192               }
 193           }
 194         }
 195       else  /* underflow: one has |xy| < 2^(emin-1). */
 196         {
 197           unsigned long scale = 0;
 198           mpfr_t scaled_z;
 199           mpfr_srcptr new_z;
 200           mpfr_exp_t diffexp;
 201           mpfr_prec_t pzs;
 202           int xy_underflows;
 203
 204           /* Let's scale z so that ulp(z) > 2^emin and ulp(s) > 2^emin
 205              (the + 1 on MPFR_PREC (s) is necessary because the exponent
 206              of the result can be EXP(z) - 1). */
 207           diffexp = MPFR_GET_EXP (z) - __gmpfr_emin;
 208           pzs = MAX (MPFR_PREC (z), MPFR_PREC (s) + 1);
 209           if (diffexp <= pzs)
 210             {
 211               mpfr_uexp_t uscale;
 212               mpfr_t scaled_v;
 213               MPFR_BLOCK_DECL (flags);
 214
 215               uscale = (mpfr_uexp_t) pzs - diffexp + 1;
 216               MPFR_ASSERTN (uscale > 0);
 217               MPFR_ASSERTN (uscale <= ULONG_MAX);
 218               scale = uscale;
 219               mpfr_init2 (scaled_z, MPFR_PREC (z));
 220               inexact = mpfr_mul_2ui (scaled_z, z, scale, MPFR_RNDN);
 221               MPFR_ASSERTN (inexact == 0);  /* TODO: overflow case */
 222               new_z = scaled_z;
 223               /* Now we need to recompute u = xy * 2^scale. */
 224               MPFR_BLOCK (flags,
 225                           if (MPFR_GET_EXP (x) < MPFR_GET_EXP (y))
 226                             {
 227                               mpfr_init2 (scaled_v, MPFR_PREC (x));
 228                               mpfr_mul_2ui (scaled_v, x, scale, MPFR_RNDN);
 229                               mpfr_mul (u, scaled_v, y, MPFR_RNDN);
 230                             }
 231                           else
 232                             {
 233                               mpfr_init2 (scaled_v, MPFR_PREC (y));
 234                               mpfr_mul_2ui (scaled_v, y, scale, MPFR_RNDN);
 235                               mpfr_mul (u, x, scaled_v, MPFR_RNDN);
 236                             });
 237               mpfr_clear (scaled_v);
 238               MPFR_ASSERTN (! MPFR_OVERFLOW (flags));
 239               xy_underflows = MPFR_UNDERFLOW (flags);
 240             }
 241           else
 242             {
 243               new_z = z;
 244               xy_underflows = 1;
 245             }
 246
 247           if (xy_underflows)
 248             {
 249               /* Let's replace xy by sign(xy) * 2^(emin-1). */
 250               MPFR_PREC (u) = MPFR_PREC_MIN;
 251               mpfr_setmin (u, __gmpfr_emin);
 252               MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
 253                                                 MPFR_SIGN (y)));
 254             }
 255
 256           {
 257             MPFR_BLOCK_DECL (flags);
 258
 259             MPFR_BLOCK (flags, inexact = mpfr_add (s, u, new_z, rnd_mode));
 260             MPFR_GROUP_CLEAR (group);
 261             if (scale != 0)
 262               {
 263                 int inex2;
 264
 265                 mpfr_clear (scaled_z);
 266                 /* Here an overflow is theoretically possible, in which case
 267                    the result may be wrong, hence the assert. An underflow
 268                    is not possible, but let's check that anyway. */
 269                 MPFR_ASSERTN (! MPFR_OVERFLOW (flags));  /* TODO... */
 270                 MPFR_ASSERTN (! MPFR_UNDERFLOW (flags));  /* not possible */
 271                 inex2 = mpfr_div_2ui (s, s, scale, MPFR_RNDN);
 272                 /* FIXME: this seems incorrect. MPFR_RNDN -> rnd_mode?
 273                    Also, handle the double rounding case:
 274                    s / 2^scale = 2^(emin - 2) in MPFR_RNDN. */
 275                 if (inex2)  /* underflow */
 276                   inexact = inex2;
 277               }
 278           }
 279
 280           /* FIXME/TODO: I'm not sure that the following is correct.
 281              Check for possible spurious exceptions due to intermediate
 282              computations. */
 283           MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 284           goto end;
 285         }
 286     }
 287
 288   inexact = mpfr_add (s, u, z, rnd_mode);
 289   MPFR_GROUP_CLEAR (group);
 290   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
 291  end:
 292   MPFR_SAVE_EXPO_FREE (expo);
 293   return mpfr_check_range (s, inexact, rnd_mode);
 294 }