mpn/generic/get_str.c

   1 /* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
   2
   3    Contributed to the GNU project by Torbjorn Granlund.
   4
   5    THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH A MUTABLE
   6    INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN
   7    FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
   8    GNU MP RELEASE.
   9
  10 Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
  11 2008 Free Software Foundation, Inc.
  12
  13 This file is part of the GNU MP Library.
  14
  15 The GNU MP Library is free software; you can redistribute it and/or modify
  16 it under the terms of the GNU Lesser General Public License as published by
  17 the Free Software Foundation; either version 3 of the License, or (at your
  18 option) any later version.
  19
  20 The GNU MP Library is distributed in the hope that it will be useful, but
  21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  22 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  23 License for more details.
  24
  25 You should have received a copy of the GNU Lesser General Public License
  26 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  27
  28 #include "gmp.h"
  29 #include "gmp-impl.h"
  30 #include "longlong.h"
  31
  32 /* Conversion of U {up,un} to a string in base b.  Internally, we convert to
  33    base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
  34
  35   A) Divide U repeatedly by B, generating a quotient and remainder, until the
  36      quotient becomes zero.  The remainders hold the converted digits.  Digits
  37      come out from right to left.  (Used in mpn_sb_get_str.)
  38
  39   B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
  40      Then develop digits by multiplying the fraction repeatedly by b.  Digits
  41      come out from left to right.  (Currently not used herein, except for in
  42      code for converting single limbs to individual digits.)
  43
  44   C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
  45      sqrt(U).  Then divide U by B^s, generating quotient and remainder.
  46      Recursively convert the quotient, then the remainder, using the
  47      precomputed powers.  Digits come out from left to right.  (Used in
  48      mpn_dc_get_str.)
  49
  50   When using algorithm C, algorithm B might be suitable for basecase code,
  51   since the required b^g power will be readily accessible.
  52
  53   Optimization ideas:
  54   1. The recursive function of (C) could use less temporary memory.  The powtab
  55      allocation could be trimmed with some computation, and the tmp area could
  56      be reduced, or perhaps eliminated if up is reused for both quotient and
  57      remainder (it is currently used just for remainder).
  58   2. Store the powers of (C) in normalized form, with the normalization count.
  59      Quotients will usually need to be left-shifted before each divide, and
  60      remainders will either need to be left-shifted of right-shifted.
  61   3. In the code for developing digits from a single limb, we could avoid using
  62      a full umul_ppmm except for the first (or first few) digits, provided base
  63      is even.  Subsequent digits can be developed using plain multiplication.
  64      (This saves on register-starved machines (read x86) and on all machines
  65      that generate the upper product half using a separate instruction (alpha,
  66      powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
  67   4. Separate mpn_dc_get_str basecase code from code for small conversions. The
  68      former code will have the exact right power readily available in the
  69      powtab parameter for dividing the current number into a fraction.  Convert
  70      that using algorithm B.
  71   5. Completely avoid division.  Compute the inverses of the powers now in
  72      powtab instead of the actual powers.
  73   6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
  74      about 30% (1-log(5)/log(10)).
  75
  76   Basic structure of (C):
  77     mpn_get_str:
  78       if POW2_P (n)
  79         ...
  80       else
  81         if (un < GET_STR_PRECOMPUTE_THRESHOLD)
  82           mpn_sb_get_str (str, base, up, un);
  83         else
  84           precompute_power_tables
  85           mpn_dc_get_str
  86
  87     mpn_dc_get_str:
  88         mpn_tdiv_qr
  89         if (qn < GET_STR_DC_THRESHOLD)
  90           mpn_sb_get_str
  91         else
  92           mpn_dc_get_str
  93         if (rn < GET_STR_DC_THRESHOLD)
  94           mpn_sb_get_str
  95         else
  96           mpn_dc_get_str
  97
  98
  99   The reason for the two threshold values is the cost of
 100   precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be considerably
 101   larger than GET_STR_PRECOMPUTE_THRESHOLD.  */
 102
 103
 104 /* The x86s and m68020 have a quotient and remainder "div" instruction and
 105    gcc recognises an adjacent "/" and "%" can be combined using that.
 106    Elsewhere "/" and "%" are either separate instructions, or separate
 107    libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
 108    A multiply and subtract should be faster than a "%" in those cases.  */
 109 #if HAVE_HOST_CPU_FAMILY_x86            \
 110   || HAVE_HOST_CPU_m68020               \
 111   || HAVE_HOST_CPU_m68030               \
 112   || HAVE_HOST_CPU_m68040               \
 113   || HAVE_HOST_CPU_m68060               \
 114   || HAVE_HOST_CPU_m68360 /* CPU32 */
 115 #define udiv_qrnd_unnorm(q,r,n,d)       \
 116   do {                                  \
 117     mp_limb_t  __q = (n) / (d);         \
 118     mp_limb_t  __r = (n) % (d);         \
 119     (q) = __q;                          \
 120     (r) = __r;                          \
 121   } while (0)
 122 #else
 123 #define udiv_qrnd_unnorm(q,r,n,d)       \
 124   do {                                  \
 125     mp_limb_t  __q = (n) / (d);         \
 126     mp_limb_t  __r = (n) - __q*(d);     \
 127     (q) = __q;                          \
 128     (r) = __r;                          \
 129   } while (0)
 130 #endif
 131
 132 \f
 133 /* Convert {up,un} to a string in base base, and put the result in str.
 134    Generate len characters, possibly padding with zeros to the left.  If len is
 135    zero, generate as many characters as required.  Return a pointer immediately
 136    after the last digit of the result string.  Complexity is O(un^2); intended
 137    for small conversions.  */
 138 static unsigned char *
 139 mpn_sb_get_str (unsigned char *str, size_t len,
 140                 mp_ptr up, mp_size_t un, int base)
 141 {
 142   mp_limb_t rl, ul;
 143   unsigned char *s;
 144   size_t l;
 145   /* Allocate memory for largest possible string, given that we only get here
 146      for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
 147      base is 3.  7/11 is an approximation to 1/log2(3).  */
 148 #if TUNE_PROGRAM_BUILD
 149 #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
 150 #else
 151 #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
 152 #endif
 153   unsigned char buf[BUF_ALLOC];
 154 #if TUNE_PROGRAM_BUILD
 155   mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
 156 #else
 157   mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
 158 #endif
 159
 160   if (base == 10)
 161     {
 162       /* Special case code for base==10 so that the compiler has a chance to
 163          optimize things.  */
 164
 165       MPN_COPY (rp + 1, up, un);
 166
 167       s = buf + BUF_ALLOC;
 168       while (un > 1)
 169         {
 170           int i;
 171           mp_limb_t frac, digit;
 172           MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
 173                                          MP_BASES_BIG_BASE_10,
 174                                          MP_BASES_BIG_BASE_INVERTED_10,
 175                                          MP_BASES_NORMALIZATION_STEPS_10);
 176           un -= rp[un] == 0;
 177           frac = (rp[0] + 1) << GMP_NAIL_BITS;
 178           s -= MP_BASES_CHARS_PER_LIMB_10;
 179 #if HAVE_HOST_CPU_FAMILY_x86
 180           /* The code below turns out to be a bit slower for x86 using gcc.
 181              Use plain code.  */
 182           i = MP_BASES_CHARS_PER_LIMB_10;
 183           do
 184             {
 185               umul_ppmm (digit, frac, frac, 10);
 186               *s++ = digit;
 187             }
 188           while (--i);
 189 #else
 190           /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
 191              After a few umul_ppmm, we will have accumulated enough low zeros
 192              to use a plain multiply.  */
 193           if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
 194             {
 195               umul_ppmm (digit, frac, frac, 10);
 196               *s++ = digit;
 197             }
 198           if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
 199             {
 200               umul_ppmm (digit, frac, frac, 10);
 201               *s++ = digit;
 202             }
 203           if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
 204             {
 205               umul_ppmm (digit, frac, frac, 10);
 206               *s++ = digit;
 207             }
 208           if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
 209             {
 210               umul_ppmm (digit, frac, frac, 10);
 211               *s++ = digit;
 212             }
 213           i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
 214                                              ? (4-MP_BASES_NORMALIZATION_STEPS_10)
 215                                              : 0));
 216           frac = (frac + 0xf) >> 4;
 217           do
 218             {
 219               frac *= 10;
 220               digit = frac >> (GMP_LIMB_BITS - 4);
 221               *s++ = digit;
 222               frac &= (~(mp_limb_t) 0) >> 4;
 223             }
 224           while (--i);
 225 #endif
 226           s -= MP_BASES_CHARS_PER_LIMB_10;
 227         }
 228
 229       ul = rp[1];
 230       while (ul != 0)
 231         {
 232           udiv_qrnd_unnorm (ul, rl, ul, 10);
 233           *--s = rl;
 234         }
 235     }
 236   else /* not base 10 */
 237     {
 238       unsigned chars_per_limb;
 239       mp_limb_t big_base, big_base_inverted;
 240       unsigned normalization_steps;
 241
 242       chars_per_limb = mp_bases[base].chars_per_limb;
 243       big_base = mp_bases[base].big_base;
 244       big_base_inverted = mp_bases[base].big_base_inverted;
 245       count_leading_zeros (normalization_steps, big_base);
 246
 247       MPN_COPY (rp + 1, up, un);
 248
 249       s = buf + BUF_ALLOC;
 250       while (un > 1)
 251         {
 252           int i;
 253           mp_limb_t frac;
 254           MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
 255                                          big_base, big_base_inverted,
 256                                          normalization_steps);
 257           un -= rp[un] == 0;
 258           frac = (rp[0] + 1) << GMP_NAIL_BITS;
 259           s -= chars_per_limb;
 260           i = chars_per_limb;
 261           do
 262             {
 263               mp_limb_t digit;
 264               umul_ppmm (digit, frac, frac, base);
 265               *s++ = digit;
 266             }
 267           while (--i);
 268           s -= chars_per_limb;
 269         }
 270
 271       ul = rp[1];
 272       while (ul != 0)
 273         {
 274           udiv_qrnd_unnorm (ul, rl, ul, base);
 275           *--s = rl;
 276         }
 277     }
 278
 279   l = buf + BUF_ALLOC - s;
 280   while (l < len)
 281     {
 282       *str++ = 0;
 283       len--;
 284     }
 285   while (l != 0)
 286     {
 287       *str++ = *s++;
 288       l--;
 289     }
 290   return str;
 291 }
 292
 293 \f
 294 /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
 295    the string in STR.  Generate LEN characters, possibly padding with zeros to
 296    the left.  If LEN is zero, generate as many characters as required.
 297    Return a pointer immediately after the last digit of the result string.
 298    This uses divide-and-conquer and is intended for large conversions.  */
 299 static unsigned char *
 300 mpn_dc_get_str (unsigned char *str, size_t len,
 301                 mp_ptr up, mp_size_t un,
 302                 const powers_t *powtab, mp_ptr tmp)
 303 {
 304   if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
 305     {
 306       if (un != 0)
 307         str = mpn_sb_get_str (str, len, up, un, powtab->base);
 308       else
 309         {
 310           while (len != 0)
 311             {
 312               *str++ = 0;
 313               len--;
 314             }
 315         }
 316     }
 317   else
 318     {
 319       mp_ptr pwp, qp, rp;
 320       mp_size_t pwn, qn;
 321       mp_size_t sn;
 322
 323       pwp = powtab->p;
 324       pwn = powtab->n;
 325       sn = powtab->shift;
 326
 327       if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
 328         {
 329           str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
 330         }
 331       else
 332         {
 333           qp = tmp;             /* (un - pwn + 1) limbs for qp */
 334           rp = up;              /* pwn limbs for rp; overwrite up area */
 335
 336           mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
 337           qn = un - sn - pwn; qn += qp[qn] != 0;                /* quotient size */
 338
 339           ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
 340
 341           if (len != 0)
 342             len = len - powtab->digits_in_base;
 343
 344           str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
 345           str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
 346         }
 347     }
 348   return str;
 349 }
 350
 351 \f
 352 /* There are no leading zeros on the digits generated at str, but that's not
 353    currently a documented feature.  */
 354
 355 size_t
 356 mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
 357 {
 358   mp_ptr powtab_mem, powtab_mem_ptr;
 359   mp_limb_t big_base;
 360   size_t digits_in_base;
 361   powers_t powtab[GMP_LIMB_BITS];
 362   int pi;
 363   mp_size_t n;
 364   mp_ptr p, t;
 365   size_t out_len;
 366   mp_ptr tmp;
 367   TMP_DECL;
 368
 369   /* Special case zero, as the code below doesn't handle it.  */
 370   if (un == 0)
 371     {
 372       str[0] = 0;
 373       return 1;
 374     }
 375
 376   if (POW2_P (base))
 377     {
 378       /* The base is a power of 2.  Convert from most significant end.  */
 379       mp_limb_t n1, n0;
 380       int bits_per_digit = mp_bases[base].big_base;
 381       int cnt;
 382       int bit_pos;
 383       mp_size_t i;
 384       unsigned char *s = str;
 385       mp_bitcnt_t bits;
 386
 387       n1 = up[un - 1];
 388       count_leading_zeros (cnt, n1);
 389
 390       /* BIT_POS should be R when input ends in least significant nibble,
 391          R + bits_per_digit * n when input ends in nth least significant
 392          nibble. */
 393
 394       bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
 395       cnt = bits % bits_per_digit;
 396       if (cnt != 0)
 397         bits += bits_per_digit - cnt;
 398       bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
 399
 400       /* Fast loop for bit output.  */
 401       i = un - 1;
 402       for (;;)
 403         {
 404           bit_pos -= bits_per_digit;
 405           while (bit_pos >= 0)
 406             {
 407               *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
 408               bit_pos -= bits_per_digit;
 409             }
 410           i--;
 411           if (i < 0)
 412             break;
 413           n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
 414           n1 = up[i];
 415           bit_pos += GMP_NUMB_BITS;
 416           *s++ = n0 | (n1 >> bit_pos);
 417         }
 418
 419       return s - str;
 420     }
 421
 422   /* General case.  The base is not a power of 2.  */
 423
 424   if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
 425     return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str;
 426
 427   TMP_MARK;
 428
 429   /* Allocate one large block for the powers of big_base.  */
 430   powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_get_str_powtab_alloc (un));
 431   powtab_mem_ptr = powtab_mem;
 432
 433   /* Compute a table of powers, were the largest power is >= sqrt(U).  */
 434
 435   big_base = mp_bases[base].big_base;
 436   digits_in_base = mp_bases[base].chars_per_limb;
 437
 438   {
 439     mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
 440     mp_limb_t cy;
 441     mp_size_t shift;
 442
 443     n_pows = 0;
 444     xn = 1 + un*(mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/mp_bases[base].chars_per_limb;
 445     for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
 446       {
 447         exptab[n_pows] = pn;
 448         n_pows++;
 449       }
 450     exptab[n_pows] = 1;
 451
 452     powtab[0].p = &big_base;
 453     powtab[0].n = 1;
 454     powtab[0].digits_in_base = digits_in_base;
 455     powtab[0].base = base;
 456     powtab[0].shift = 0;
 457
 458     powtab[1].p = powtab_mem_ptr;  powtab_mem_ptr += 2;
 459     powtab[1].p[0] = big_base;
 460     powtab[1].n = 1;
 461     powtab[1].digits_in_base = digits_in_base;
 462     powtab[1].base = base;
 463     powtab[1].shift = 0;
 464
 465     n = 1;
 466     p = &big_base;
 467     bexp = 1;
 468     shift = 0;
 469     for (pi = 2; pi < n_pows; pi++)
 470       {
 471         t = powtab_mem_ptr;
 472         powtab_mem_ptr += 2 * n + 2;
 473
 474         ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un));
 475
 476         mpn_sqr (t, p, n);
 477
 478         digits_in_base *= 2;
 479         n *= 2;  n -= t[n - 1] == 0;
 480         bexp *= 2;
 481
 482         if (bexp + 1 < exptab[n_pows - pi])
 483           {
 484             digits_in_base += mp_bases[base].chars_per_limb;
 485             cy = mpn_mul_1 (t, t, n, big_base);
 486             t[n] = cy;
 487             n += cy != 0;
 488             bexp += 1;
 489           }
 490         shift *= 2;
 491         /* Strip low zero limbs.  */
 492         while (t[0] == 0)
 493           {
 494             t++;
 495             n--;
 496             shift++;
 497           }
 498         p = t;
 499         powtab[pi].p = p;
 500         powtab[pi].n = n;
 501         powtab[pi].digits_in_base = digits_in_base;
 502         powtab[pi].base = base;
 503         powtab[pi].shift = shift;
 504       }
 505
 506     for (pi = 1; pi < n_pows; pi++)
 507       {
 508         t = powtab[pi].p;
 509         n = powtab[pi].n;
 510         cy = mpn_mul_1 (t, t, n, big_base);
 511         t[n] = cy;
 512         n += cy != 0;
 513         if (t[0] == 0)
 514           {
 515             powtab[pi].p = t + 1;
 516             n--;
 517             powtab[pi].shift++;
 518           }
 519         powtab[pi].n = n;
 520         powtab[pi].digits_in_base += mp_bases[base].chars_per_limb;
 521       }
 522
 523 #if 0
 524     { int i;
 525       printf ("Computed table values for base=%d, un=%d, xn=%d:\n", base, un, xn);
 526       for (i = 0; i < n_pows; i++)
 527         printf ("%2d: %10ld %10ld %11ld %ld\n", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base, powtab[i].shift);
 528     }
 529 #endif
 530   }
 531
 532   /* Using our precomputed powers, now in powtab[], convert our number.  */
 533   tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
 534   out_len = mpn_dc_get_str (str, 0, up, un, powtab - 1 + pi, tmp) - str;
 535   TMP_FREE;
 536
 537   return out_len;
 538 }