src/sqr.c

   1 /* mpc_sqr -- Square a complex number.
   2
   3 Copyright (C) 2002, 2005, 2008, 2009, 2010, 2011, 2012 INRIA
   4
   5 This file is part of GNU MPC.
   6
   7 GNU MPC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU Lesser General Public License as published by the
   9 Free Software Foundation; either version 3 of the License, or (at your
  10 option) any later version.
  11
  12 GNU MPC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  14 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
  15 more details.
  16
  17 You should have received a copy of the GNU Lesser General Public License
  18 along with this program. If not, see http://www.gnu.org/licenses/ .
  19 */
  20
  21 #include <stdio.h>    /* for MPC_ASSERT */
  22 #include "mpc-impl.h"
  23
  24
  25 static int
  26 mpfr_fsss (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr c, mpfr_rnd_t rnd)
  27 {
  28    /* Computes z = a^2 - c^2.
  29       Assumes that a and c are finite and non-zero; so a squaring yielding
  30       an infinity is an overflow, and a squaring yielding 0 is an underflow.
  31       Assumes further that z is distinct from a and c. */
  32
  33    int inex;
  34    mpfr_t u, v;
  35
  36    /* u=a^2, v=c^2 exactly */
  37    mpfr_init2 (u, 2*mpfr_get_prec (a));
  38    mpfr_init2 (v, 2*mpfr_get_prec (c));
  39    mpfr_sqr (u, a, GMP_RNDN);
  40    mpfr_sqr (v, c, GMP_RNDN);
  41
  42    /* tentatively compute z as u-v; here we need z to be distinct
  43       from a and c to not lose the latter */
  44    inex = mpfr_sub (z, u, v, rnd);
  45
  46    if (mpfr_inf_p (z)) {
  47       /* replace by "correctly rounded overflow" */
  48       mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), GMP_RNDN);
  49       inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd);
  50    }
  51    else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) {
  52       /* exactly u underflowed, determine inexact flag */
  53       inex = (mpfr_signbit (u) ? 1 : -1);
  54    }
  55    else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) {
  56       /* exactly v underflowed, determine inexact flag */
  57       inex = (mpfr_signbit (v) ? -1 : 1);
  58    }
  59    else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) {
  60       /* In the first case, u and v are +inf.
  61          In the second case, u and v are zeroes; their difference may be 0
  62          or the least representable number, with a sign to be determined.
  63          Redo the computations with mpz_t exponents */
  64       mpfr_exp_t ea, ec;
  65       mpz_t eu, ev;
  66          /* cheat to work around the const qualifiers */
  67
  68       /* Normalise the input by shifting and keep track of the shifts in
  69          the exponents of u and v */
  70       ea = mpfr_get_exp (a);
  71       ec = mpfr_get_exp (c);
  72
  73       mpfr_set_exp ((mpfr_ptr) a, (mpfr_prec_t) 0);
  74       mpfr_set_exp ((mpfr_ptr) c, (mpfr_prec_t) 0);
  75
  76       mpz_init (eu);
  77       mpz_init (ev);
  78       mpz_set_si (eu, (long int) ea);
  79       mpz_mul_2exp (eu, eu, 1);
  80       mpz_set_si (ev, (long int) ec);
  81       mpz_mul_2exp (ev, ev, 1);
  82
  83       /* recompute u and v and move exponents to eu and ev */
  84       mpfr_sqr (u, a, GMP_RNDN);
  85       /* exponent of u is non-positive */
  86       mpz_sub_ui (eu, eu, (unsigned long int) (-mpfr_get_exp (u)));
  87       mpfr_set_exp (u, (mpfr_prec_t) 0);
  88       mpfr_sqr (v, c, GMP_RNDN);
  89       mpz_sub_ui (ev, ev, (unsigned long int) (-mpfr_get_exp (v)));
  90       mpfr_set_exp (v, (mpfr_prec_t) 0);
  91       if (mpfr_nan_p (z)) {
  92          mpfr_exp_t emax = mpfr_get_emax ();
  93          int overflow;
  94          /* We have a = ma * 2^ea with 1/2 <= |ma| < 1 and ea <= emax.
  95             So eu <= 2*emax, and eu > emax since we have
  96             an overflow. The same holds for ev. Shift u and v by as much as
  97             possible so that one of them has exponent emax and the
  98             remaining exponents in eu and ev are the same. Then carry out
  99             the addition. Shifting u and v prevents an underflow. */
 100          if (mpz_cmp (eu, ev) >= 0) {
 101             mpfr_set_exp (u, emax);
 102             mpz_sub_ui (eu, eu, (long int) emax);
 103             mpz_sub (ev, ev, eu);
 104             mpfr_set_exp (v, (mpfr_exp_t) mpz_get_ui (ev));
 105                /* remaining common exponent is now in eu */
 106          }
 107          else {
 108             mpfr_set_exp (v, emax);
 109             mpz_sub_ui (ev, ev, (long int) emax);
 110             mpz_sub (eu, eu, ev);
 111             mpfr_set_exp (u, (mpfr_exp_t) mpz_get_ui (eu));
 112             mpz_set (eu, ev);
 113                /* remaining common exponent is now also in eu */
 114          }
 115          inex = mpfr_sub (z, u, v, rnd);
 116             /* Result is finite since u and v have the same sign. */
 117          overflow = mpfr_mul_2ui (z, z, mpz_get_ui (eu), rnd);
 118          if (overflow)
 119             inex = overflow;
 120       }
 121       else {
 122          int underflow;
 123          /* Subtraction of two zeroes. We have a = ma * 2^ea
 124             with 1/2 <= |ma| < 1 and ea >= emin and similarly for b.
 125             So 2*emin < 2*emin+1 <= eu < emin < 0, and analogously for v. */
 126          mpfr_exp_t emin = mpfr_get_emin ();
 127          if (mpz_cmp (eu, ev) <= 0) {
 128             mpfr_set_exp (u, emin);
 129             mpz_add_ui (eu, eu, (unsigned long int) (-emin));
 130             mpz_sub (ev, ev, eu);
 131             mpfr_set_exp (v, (mpfr_exp_t) mpz_get_si (ev));
 132          }
 133          else {
 134             mpfr_set_exp (v, emin);
 135             mpz_add_ui (ev, ev, (unsigned long int) (-emin));
 136             mpz_sub (eu, eu, ev);
 137             mpfr_set_exp (u, (mpfr_exp_t) mpz_get_si (eu));
 138             mpz_set (eu, ev);
 139          }
 140          inex = mpfr_sub (z, u, v, rnd);
 141          mpz_neg (eu, eu);
 142          underflow = mpfr_div_2ui (z, z, mpz_get_ui (eu), rnd);
 143          if (underflow)
 144             inex = underflow;
 145       }
 146
 147       mpz_clear (eu);
 148       mpz_clear (ev);
 149
 150       mpfr_set_exp ((mpfr_ptr) a, ea);
 151       mpfr_set_exp ((mpfr_ptr) c, ec);
 152          /* works also when a == c */
 153    }
 154
 155    mpfr_clear (u);
 156    mpfr_clear (v);
 157
 158    return inex;
 159 }
 160
 161
 162 int
 163 mpc_sqr (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
 164 {
 165    int ok;
 166    mpfr_t u, v;
 167    mpfr_t x;
 168       /* temporary variable to hold the real part of op,
 169          needed in the case rop==op */
 170    mpfr_prec_t prec;
 171    int inex_re, inex_im, inexact;
 172    mpfr_exp_t emin;
 173    int saved_underflow;
 174
 175    /* special values: NaN and infinities */
 176    if (!mpc_fin_p (op)) {
 177       if (mpfr_nan_p (mpc_realref (op)) || mpfr_nan_p (mpc_imagref (op))) {
 178          mpfr_set_nan (mpc_realref (rop));
 179          mpfr_set_nan (mpc_imagref (rop));
 180       }
 181       else if (mpfr_inf_p (mpc_realref (op))) {
 182          if (mpfr_inf_p (mpc_imagref (op))) {
 183             mpfr_set_inf (mpc_imagref (rop),
 184                           MPFR_SIGN (mpc_realref (op)) * MPFR_SIGN (mpc_imagref (op)));
 185             mpfr_set_nan (mpc_realref (rop));
 186          }
 187          else {
 188             if (mpfr_zero_p (mpc_imagref (op)))
 189                mpfr_set_nan (mpc_imagref (rop));
 190             else
 191                mpfr_set_inf (mpc_imagref (rop),
 192                              MPFR_SIGN (mpc_realref (op)) * MPFR_SIGN (mpc_imagref (op)));
 193             mpfr_set_inf (mpc_realref (rop), +1);
 194          }
 195       }
 196       else /* IM(op) is infinity, RE(op) is not */ {
 197          if (mpfr_zero_p (mpc_realref (op)))
 198             mpfr_set_nan (mpc_imagref (rop));
 199          else
 200             mpfr_set_inf (mpc_imagref (rop),
 201                           MPFR_SIGN (mpc_realref (op)) * MPFR_SIGN (mpc_imagref (op)));
 202          mpfr_set_inf (mpc_realref (rop), -1);
 203       }
 204       return MPC_INEX (0, 0); /* exact */
 205    }
 206
 207    prec = MPC_MAX_PREC(rop);
 208
 209    /* Check for real resp. purely imaginary number */
 210    if (mpfr_zero_p (mpc_imagref(op))) {
 211       int same_sign = mpfr_signbit (mpc_realref (op)) == mpfr_signbit (mpc_imagref (op));
 212       inex_re = mpfr_sqr (mpc_realref(rop), mpc_realref(op), MPC_RND_RE(rnd));
 213       inex_im = mpfr_set_ui (mpc_imagref(rop), 0ul, GMP_RNDN);
 214       if (!same_sign)
 215         mpc_conj (rop, rop, MPC_RNDNN);
 216       return MPC_INEX(inex_re, inex_im);
 217    }
 218    if (mpfr_zero_p (mpc_realref(op))) {
 219       int same_sign = mpfr_signbit (mpc_realref (op)) == mpfr_signbit (mpc_imagref (op));
 220       inex_re = -mpfr_sqr (mpc_realref(rop), mpc_imagref(op), INV_RND (MPC_RND_RE(rnd)));
 221       mpfr_neg (mpc_realref(rop), mpc_realref(rop), GMP_RNDN);
 222       inex_im = mpfr_set_ui (mpc_imagref(rop), 0ul, GMP_RNDN);
 223       if (!same_sign)
 224         mpc_conj (rop, rop, MPC_RNDNN);
 225       return MPC_INEX(inex_re, inex_im);
 226    }
 227
 228    if (rop == op)
 229    {
 230       mpfr_init2 (x, MPC_PREC_RE (op));
 231       mpfr_set (x, op->re, GMP_RNDN);
 232    }
 233    else
 234       x [0] = op->re [0];
 235    /* From here on, use x instead of op->re and safely overwrite rop->re. */
 236
 237    /* Compute real part of result. */
 238    if (SAFE_ABS (mpfr_exp_t,
 239                  mpfr_get_exp (mpc_realref (op)) - mpfr_get_exp (mpc_imagref (op)))
 240        > (mpfr_exp_t) MPC_MAX_PREC (op) / 2) {
 241       /* If the real and imaginary parts of the argument have very different
 242          exponents, it is not reasonable to use Karatsuba squaring; compute
 243          exactly with the standard formulae instead, even if this means an
 244          additional multiplication. Using the approach copied from mul, over-
 245          and underflows are also handled correctly. */
 246
 247       inex_re = mpfr_fsss (rop->re, x, op->im, MPC_RND_RE (rnd));
 248    }
 249    else {
 250       /* Karatsuba squaring: we compute the real part as (x+y)*(x-y) and the
 251          imaginary part as 2*x*y, with a total of 2M instead of 2S+1M for the
 252          naive algorithm, which computes x^2-y^2 and 2*y*y */
 253       mpfr_init (u);
 254       mpfr_init (v);
 255
 256       emin = mpfr_get_emin ();
 257
 258       do
 259       {
 260          prec += mpc_ceil_log2 (prec) + 5;
 261
 262          mpfr_set_prec (u, prec);
 263          mpfr_set_prec (v, prec);
 264
 265          /* Let op = x + iy. We need u = x+y and v = x-y, rounded away.      */
 266          /* The error is bounded above by 1 ulp.                             */
 267          /* We first let inexact be 1 if the real part is not computed       */
 268          /* exactly and determine the sign later.                            */
 269          inexact =  ROUND_AWAY (mpfr_add (u, x, mpc_imagref (op), MPFR_RNDA), u)
 270                   | ROUND_AWAY (mpfr_sub (v, x, mpc_imagref (op), MPFR_RNDA), v);
 271
 272          /* compute the real part as u*v, rounded away                    */
 273          /* determine also the sign of inex_re                            */
 274
 275          if (mpfr_sgn (u) == 0 || mpfr_sgn (v) == 0) {
 276             /* as we have rounded away, the result is exact */
 277             mpfr_set_ui (mpc_realref (rop), 0, GMP_RNDN);
 278             inex_re = 0;
 279             ok = 1;
 280          }
 281          else {
 282             mpfr_rnd_t rnd_away;
 283                /* FIXME: can be replaced by MPFR_RNDA in mpfr >= 3 */
 284             rnd_away = (mpfr_sgn (u) * mpfr_sgn (v) > 0 ? GMP_RNDU : GMP_RNDD);
 285             inexact |= ROUND_AWAY (mpfr_mul (u, u, v, MPFR_RNDA), u); /* error 5 */
 286             if (mpfr_get_exp (u) == emin || mpfr_inf_p (u)) {
 287                /* under- or overflow */
 288                inex_re = mpfr_fsss (rop->re, x, op->im, MPC_RND_RE (rnd));
 289                ok = 1;
 290             }
 291             else {
 292                ok = (!inexact) | mpfr_can_round (u, prec - 3,
 293                      rnd_away, GMP_RNDZ,
 294                      MPC_PREC_RE (rop) + (MPC_RND_RE (rnd) == GMP_RNDN));
 295                if (ok) {
 296                   inex_re = mpfr_set (mpc_realref (rop), u, MPC_RND_RE (rnd));
 297                   if (inex_re == 0)
 298                      /* remember that u was already rounded */
 299                      inex_re = inexact;
 300                }
 301             }
 302          }
 303       }
 304       while (!ok);
 305
 306       mpfr_clear (u);
 307       mpfr_clear (v);
 308    }
 309
 310    saved_underflow = mpfr_underflow_p ();
 311    mpfr_clear_underflow ();
 312    inex_im = mpfr_mul (rop->im, x, op->im, MPC_RND_IM (rnd));
 313    if (!mpfr_underflow_p ())
 314       inex_im |= mpfr_mul_2ui (rop->im, rop->im, 1, MPC_RND_IM (rnd));
 315       /* We must not multiply by 2 if rop->im has been set to the smallest
 316          representable number. */
 317    if (saved_underflow)
 318       mpfr_set_underflow ();
 319
 320    if (rop == op)
 321       mpfr_clear (x);
 322
 323    return MPC_INEX (inex_re, inex_im);
 324 }