mpn/generic/tdiv_qr.c

   1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
   2    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
   3    qxn is non-zero, generate that many fraction limbs and append them after the
   4    other quotient limbs, and update the remainder accordingly.  The input
   5    operands are unaffected.
   6
   7    Preconditions:
   8    1. The most significant limb of of the divisor must be non-zero.
   9    2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
  10
  11    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
  12    complexity of multiplication.
  13
  14 Copyright 1997, 2000, 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
  15
  16 This file is part of the GNU MP Library.
  17
  18 The GNU MP Library is free software; you can redistribute it and/or modify
  19 it under the terms of the GNU Lesser General Public License as published by
  20 the Free Software Foundation; either version 3 of the License, or (at your
  21 option) any later version.
  22
  23 The GNU MP Library is distributed in the hope that it will be useful, but
  24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  26 License for more details.
  27
  28 You should have received a copy of the GNU Lesser General Public License
  29 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  30
  31 #include "gmp.h"
  32 #include "gmp-impl.h"
  33 #include "longlong.h"
  34
  35
  36 void
  37 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
  38              mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
  39 {
  40   ASSERT_ALWAYS (qxn == 0);
  41
  42   ASSERT (nn >= 0);
  43   ASSERT (dn >= 0);
  44   ASSERT (dn == 0 || dp[dn - 1] != 0);
  45   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
  46   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
  47
  48   switch (dn)
  49     {
  50     case 0:
  51       DIVIDE_BY_ZERO;
  52
  53     case 1:
  54       {
  55         rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
  56         return;
  57       }
  58
  59     case 2:
  60       {
  61         mp_ptr n2p, d2p;
  62         mp_limb_t qhl, cy;
  63         TMP_DECL;
  64         TMP_MARK;
  65         if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
  66           {
  67             int cnt;
  68             mp_limb_t dtmp[2];
  69             count_leading_zeros (cnt, dp[1]);
  70             cnt -= GMP_NAIL_BITS;
  71             d2p = dtmp;
  72             d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
  73             d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
  74             n2p = TMP_ALLOC_LIMBS (nn + 1);
  75             cy = mpn_lshift (n2p, np, nn, cnt);
  76             n2p[nn] = cy;
  77             qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
  78             if (cy == 0)
  79               qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
  80             rp[0] = (n2p[0] >> cnt)
  81               | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
  82             rp[1] = (n2p[1] >> cnt);
  83           }
  84         else
  85           {
  86             d2p = (mp_ptr) dp;
  87             n2p = TMP_ALLOC_LIMBS (nn);
  88             MPN_COPY (n2p, np, nn);
  89             qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
  90             qp[nn - 2] = qhl;   /* always store nn-2+1 quotient limbs */
  91             rp[0] = n2p[0];
  92             rp[1] = n2p[1];
  93           }
  94         TMP_FREE;
  95         return;
  96       }
  97
  98     default:
  99       {
 100         int adjust;
 101         gmp_pi1_t dinv;
 102         TMP_DECL;
 103         TMP_MARK;
 104         adjust = np[nn - 1] >= dp[dn - 1];      /* conservative tests for quotient size */
 105         if (nn + adjust >= 2 * dn)
 106           {
 107             mp_ptr n2p, d2p;
 108             mp_limb_t cy;
 109             int cnt;
 110
 111             qp[nn - dn] = 0;                      /* zero high quotient limb */
 112             if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
 113               {
 114                 count_leading_zeros (cnt, dp[dn - 1]);
 115                 cnt -= GMP_NAIL_BITS;
 116                 d2p = TMP_ALLOC_LIMBS (dn);
 117                 mpn_lshift (d2p, dp, dn, cnt);
 118                 n2p = TMP_ALLOC_LIMBS (nn + 1);
 119                 cy = mpn_lshift (n2p, np, nn, cnt);
 120                 n2p[nn] = cy;
 121                 nn += adjust;
 122               }
 123             else
 124               {
 125                 cnt = 0;
 126                 d2p = (mp_ptr) dp;
 127                 n2p = TMP_ALLOC_LIMBS (nn + 1);
 128                 MPN_COPY (n2p, np, nn);
 129                 n2p[nn] = 0;
 130                 nn += adjust;
 131               }
 132
 133             invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
 134             if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
 135               mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
 136             else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
 137                      BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
 138                      (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
 139                      + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
 140               mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
 141             else
 142               {
 143                 mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
 144                 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 145                 mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
 146                 n2p = rp;
 147               }
 148
 149             if (cnt != 0)
 150               mpn_rshift (rp, n2p, dn, cnt);
 151             else
 152               MPN_COPY (rp, n2p, dn);
 153             TMP_FREE;
 154             return;
 155           }
 156
 157         /* When we come here, the numerator/partial remainder is less
 158            than twice the size of the denominator.  */
 159
 160           {
 161             /* Problem:
 162
 163                Divide a numerator N with nn limbs by a denominator D with dn
 164                limbs forming a quotient of qn=nn-dn+1 limbs.  When qn is small
 165                compared to dn, conventional division algorithms perform poorly.
 166                We want an algorithm that has an expected running time that is
 167                dependent only on qn.
 168
 169                Algorithm (very informally stated):
 170
 171                1) Divide the 2 x qn most significant limbs from the numerator
 172                   by the qn most significant limbs from the denominator.  Call
 173                   the result qest.  This is either the correct quotient, but
 174                   might be 1 or 2 too large.  Compute the remainder from the
 175                   division.  (This step is implemented by a mpn_divrem call.)
 176
 177                2) Is the most significant limb from the remainder < p, where p
 178                   is the product of the most significant limb from the quotient
 179                   and the next(d)?  (Next(d) denotes the next ignored limb from
 180                   the denominator.)  If it is, decrement qest, and adjust the
 181                   remainder accordingly.
 182
 183                3) Is the remainder >= qest?  If it is, qest is the desired
 184                   quotient.  The algorithm terminates.
 185
 186                4) Subtract qest x next(d) from the remainder.  If there is
 187                   borrow out, decrement qest, and adjust the remainder
 188                   accordingly.
 189
 190                5) Skip one word from the denominator (i.e., let next(d) denote
 191                   the next less significant limb.  */
 192
 193             mp_size_t qn;
 194             mp_ptr n2p, d2p;
 195             mp_ptr tp;
 196             mp_limb_t cy;
 197             mp_size_t in, rn;
 198             mp_limb_t quotient_too_large;
 199             unsigned int cnt;
 200
 201             qn = nn - dn;
 202             qp[qn] = 0;                         /* zero high quotient limb */
 203             qn += adjust;                       /* qn cannot become bigger */
 204
 205             if (qn == 0)
 206               {
 207                 MPN_COPY (rp, np, dn);
 208                 TMP_FREE;
 209                 return;
 210               }
 211
 212             in = dn - qn;               /* (at least partially) ignored # of limbs in ops */
 213             /* Normalize denominator by shifting it to the left such that its
 214                most significant bit is set.  Then shift the numerator the same
 215                amount, to mathematically preserve quotient.  */
 216             if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
 217               {
 218                 count_leading_zeros (cnt, dp[dn - 1]);
 219                 cnt -= GMP_NAIL_BITS;
 220
 221                 d2p = TMP_ALLOC_LIMBS (qn);
 222                 mpn_lshift (d2p, dp + in, qn, cnt);
 223                 d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
 224
 225                 n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
 226                 cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
 227                 if (adjust)
 228                   {
 229                     n2p[2 * qn] = cy;
 230                     n2p++;
 231                   }
 232                 else
 233                   {
 234                     n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
 235                   }
 236               }
 237             else
 238               {
 239                 cnt = 0;
 240                 d2p = (mp_ptr) dp + in;
 241
 242                 n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
 243                 MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
 244                 if (adjust)
 245                   {
 246                     n2p[2 * qn] = 0;
 247                     n2p++;
 248                   }
 249               }
 250
 251             /* Get an approximate quotient using the extracted operands.  */
 252             if (qn == 1)
 253               {
 254                 mp_limb_t q0, r0;
 255                 udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
 256                 n2p[0] = r0 >> GMP_NAIL_BITS;
 257                 qp[0] = q0;
 258               }
 259             else if (qn == 2)
 260               mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
 261             else
 262               {
 263                 invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
 264                 if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
 265                   mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
 266                 else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
 267                   mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
 268                 else
 269                   {
 270                     mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
 271                     mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 272                     mp_ptr r2p = rp;
 273                     if (np == r2p)      /* If N and R share space, put ... */
 274                       r2p += nn - qn;   /* intermediate remainder at N's upper end. */
 275                     mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
 276                     MPN_COPY (n2p, r2p, qn);
 277                   }
 278               }
 279
 280             rn = qn;
 281             /* Multiply the first ignored divisor limb by the most significant
 282                quotient limb.  If that product is > the partial remainder's
 283                most significant limb, we know the quotient is too large.  This
 284                test quickly catches most cases where the quotient is too large;
 285                it catches all cases where the quotient is 2 too large.  */
 286             {
 287               mp_limb_t dl, x;
 288               mp_limb_t h, dummy;
 289
 290               if (in - 2 < 0)
 291                 dl = 0;
 292               else
 293                 dl = dp[in - 2];
 294
 295 #if GMP_NAIL_BITS == 0
 296               x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
 297 #else
 298               x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
 299               if (cnt != 0)
 300                 x |= dl >> (GMP_NUMB_BITS - cnt);
 301 #endif
 302               umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
 303
 304               if (n2p[qn - 1] < h)
 305                 {
 306                   mp_limb_t cy;
 307
 308                   mpn_decr_u (qp, (mp_limb_t) 1);
 309                   cy = mpn_add_n (n2p, n2p, d2p, qn);
 310                   if (cy)
 311                     {
 312                       /* The partial remainder is safely large.  */
 313                       n2p[qn] = cy;
 314                       ++rn;
 315                     }
 316                 }
 317             }
 318
 319             quotient_too_large = 0;
 320             if (cnt != 0)
 321               {
 322                 mp_limb_t cy1, cy2;
 323
 324                 /* Append partially used numerator limb to partial remainder.  */
 325                 cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
 326                 n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
 327
 328                 /* Update partial remainder with partially used divisor limb.  */
 329                 cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
 330                 if (qn != rn)
 331                   {
 332                     ASSERT_ALWAYS (n2p[qn] >= cy2);
 333                     n2p[qn] -= cy2;
 334                   }
 335                 else
 336                   {
 337                     n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
 338
 339                     quotient_too_large = (cy1 < cy2);
 340                     ++rn;
 341                   }
 342                 --in;
 343               }
 344             /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
 345
 346             tp = TMP_ALLOC_LIMBS (dn);
 347
 348             if (in < qn)
 349               {
 350                 if (in == 0)
 351                   {
 352                     MPN_COPY (rp, n2p, rn);
 353                     ASSERT_ALWAYS (rn == dn);
 354                     goto foo;
 355                   }
 356                 mpn_mul (tp, qp, qn, dp, in);
 357               }
 358             else
 359               mpn_mul (tp, dp, in, qp, qn);
 360
 361             cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
 362             MPN_COPY (rp + in, n2p, dn - in);
 363             quotient_too_large |= cy;
 364             cy = mpn_sub_n (rp, np, tp, in);
 365             cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
 366             quotient_too_large |= cy;
 367           foo:
 368             if (quotient_too_large)
 369               {
 370                 mpn_decr_u (qp, (mp_limb_t) 1);
 371                 mpn_add_n (rp, rp, dp, dn);
 372               }
 373           }
 374         TMP_FREE;
 375         return;
 376       }
 377     }
 378 }