mpn/generic/divrem_1.c

   1 /* mpn_divrem_1 -- mpn by limb division.
   2
   3 Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of either:
  10
  11   * the GNU Lesser General Public License as published by the Free
  12     Software Foundation; either version 3 of the License, or (at your
  13     option) any later version.
  14
  15 or
  16
  17   * the GNU General Public License as published by the Free Software
  18     Foundation; either version 2 of the License, or (at your option) any
  19     later version.
  20
  21 or both in parallel, as here.
  22
  23 The GNU MP Library is distributed in the hope that it will be useful, but
  24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  26 for more details.
  27
  28 You should have received copies of the GNU General Public License and the
  29 GNU Lesser General Public License along with the GNU MP Library.  If not,
  30 see https://www.gnu.org/licenses/.  */
  31
  32 #include "gmp.h"
  33 #include "gmp-impl.h"
  34 #include "longlong.h"
  35
  36
  37 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
  38    meaning the quotient size where that should happen, the quotient size
  39    being how many udiv divisions will be done.
  40
  41    The default is to use preinv always, CPUs where this doesn't suit have
  42    tuned thresholds.  Note in particular that preinv should certainly be
  43    used if that's the only division available (USE_PREINV_ALWAYS).  */
  44
  45 #ifndef DIVREM_1_NORM_THRESHOLD
  46 #define DIVREM_1_NORM_THRESHOLD  0
  47 #endif
  48 #ifndef DIVREM_1_UNNORM_THRESHOLD
  49 #define DIVREM_1_UNNORM_THRESHOLD  0
  50 #endif
  51
  52
  53
  54 /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
  55    and UNNORM thresholds are 0 and only the inversion code is included.
  56
  57    If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
  58    will be MP_SIZE_T_MAX and only the plain division code is included.
  59
  60    Otherwise mul-by-inverse is better than plain division above some
  61    threshold, and best results are obtained by having code for both present.
  62
  63    The main reason for separating the norm and unnorm cases is that not all
  64    CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
  65    code used on an already normalized divisor.
  66
  67    If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
  68    non-shifting code for both the norm and unnorm cases, though with
  69    different criteria for skipping a division, and with different thresholds
  70    of course.  And in fact if inversion is never viable, then that simple
  71    non-shifting division would be all that's left.
  72
  73    The NORM and UNNORM thresholds might not differ much, but if there's
  74    going to be separate code for norm and unnorm then it makes sense to have
  75    separate thresholds.  One thing that's possible is that the
  76    mul-by-inverse might be better only for normalized divisors, due to that
  77    case not needing variable bit shifts.
  78
  79    Notice that the thresholds are tested after the decision to possibly skip
  80    one divide step, so they're based on the actual number of divisions done.
  81
  82    For the unnorm case, it would be possible to call mpn_lshift to adjust
  83    the dividend all in one go (into the quotient space say), rather than
  84    limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
  85    than what the compiler can generate for EXTRACT.  But this is left to CPU
  86    specific implementations to consider, especially since EXTRACT isn't on
  87    the dependent chain.  */
  88
  89 mp_limb_t
  90 mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
  91               mp_srcptr up, mp_size_t un, mp_limb_t d)
  92 {
  93   mp_size_t  n;
  94   mp_size_t  i;
  95   mp_limb_t  n1, n0;
  96   mp_limb_t  r = 0;
  97
  98   ASSERT (qxn >= 0);
  99   ASSERT (un >= 0);
 100   ASSERT (d != 0);
 101   /* FIXME: What's the correct overlap rule when qxn!=0? */
 102   ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
 103
 104   n = un + qxn;
 105   if (n == 0)
 106     return 0;
 107
 108   d <<= GMP_NAIL_BITS;
 109
 110   qp += (n - 1);   /* Make qp point at most significant quotient limb */
 111
 112   if ((d & GMP_LIMB_HIGHBIT) != 0)
 113     {
 114       if (un != 0)
 115         {
 116           /* High quotient limb is 0 or 1, skip a divide step. */
 117           mp_limb_t q;
 118           r = up[un - 1] << GMP_NAIL_BITS;
 119           q = (r >= d);
 120           *qp-- = q;
 121           r -= (d & -q);
 122           r >>= GMP_NAIL_BITS;
 123           n--;
 124           un--;
 125         }
 126
 127       if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
 128         {
 129         plain:
 130           for (i = un - 1; i >= 0; i--)
 131             {
 132               n0 = up[i] << GMP_NAIL_BITS;
 133               udiv_qrnnd (*qp, r, r, n0, d);
 134               r >>= GMP_NAIL_BITS;
 135               qp--;
 136             }
 137           for (i = qxn - 1; i >= 0; i--)
 138             {
 139               udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
 140               r >>= GMP_NAIL_BITS;
 141               qp--;
 142             }
 143           return r;
 144         }
 145       else
 146         {
 147           /* Multiply-by-inverse, divisor already normalized. */
 148           mp_limb_t dinv;
 149           invert_limb (dinv, d);
 150
 151           for (i = un - 1; i >= 0; i--)
 152             {
 153               n0 = up[i] << GMP_NAIL_BITS;
 154               udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
 155               r >>= GMP_NAIL_BITS;
 156               qp--;
 157             }
 158           for (i = qxn - 1; i >= 0; i--)
 159             {
 160               udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
 161               r >>= GMP_NAIL_BITS;
 162               qp--;
 163             }
 164           return r;
 165         }
 166     }
 167   else
 168     {
 169       /* Most significant bit of divisor == 0.  */
 170       int cnt;
 171
 172       /* Skip a division if high < divisor (high quotient 0).  Testing here
 173          before normalizing will still skip as often as possible.  */
 174       if (un != 0)
 175         {
 176           n1 = up[un - 1] << GMP_NAIL_BITS;
 177           if (n1 < d)
 178             {
 179               r = n1 >> GMP_NAIL_BITS;
 180               *qp-- = 0;
 181               n--;
 182               if (n == 0)
 183                 return r;
 184               un--;
 185             }
 186         }
 187
 188       if (! UDIV_NEEDS_NORMALIZATION
 189           && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
 190         goto plain;
 191
 192       count_leading_zeros (cnt, d);
 193       d <<= cnt;
 194       r <<= cnt;
 195
 196       if (UDIV_NEEDS_NORMALIZATION
 197           && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
 198         {
 199           mp_limb_t nshift;
 200           if (un != 0)
 201             {
 202               n1 = up[un - 1] << GMP_NAIL_BITS;
 203               r |= (n1 >> (GMP_LIMB_BITS - cnt));
 204               for (i = un - 2; i >= 0; i--)
 205                 {
 206                   n0 = up[i] << GMP_NAIL_BITS;
 207                   nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
 208                   udiv_qrnnd (*qp, r, r, nshift, d);
 209                   r >>= GMP_NAIL_BITS;
 210                   qp--;
 211                   n1 = n0;
 212                 }
 213               udiv_qrnnd (*qp, r, r, n1 << cnt, d);
 214               r >>= GMP_NAIL_BITS;
 215               qp--;
 216             }
 217           for (i = qxn - 1; i >= 0; i--)
 218             {
 219               udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
 220               r >>= GMP_NAIL_BITS;
 221               qp--;
 222             }
 223           return r >> cnt;
 224         }
 225       else
 226         {
 227           mp_limb_t  dinv, nshift;
 228           invert_limb (dinv, d);
 229           if (un != 0)
 230             {
 231               n1 = up[un - 1] << GMP_NAIL_BITS;
 232               r |= (n1 >> (GMP_LIMB_BITS - cnt));
 233               for (i = un - 2; i >= 0; i--)
 234                 {
 235                   n0 = up[i] << GMP_NAIL_BITS;
 236                   nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
 237                   udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
 238                   r >>= GMP_NAIL_BITS;
 239                   qp--;
 240                   n1 = n0;
 241                 }
 242               udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
 243               r >>= GMP_NAIL_BITS;
 244               qp--;
 245             }
 246           for (i = qxn - 1; i >= 0; i--)
 247             {
 248               udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
 249               r >>= GMP_NAIL_BITS;
 250               qp--;
 251             }
 252           return r >> cnt;
 253         }
 254     }
 255 }