libgo/go/big/nat.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Package big implements multi-precision arithmetic (big numbers).
   6 // The following numeric types are supported:
   7 //
   8 //      - Int   signed integers
   9 //      - Rat   rational numbers
  10 //
  11 // All methods on Int take the result as the receiver; if it is one
  12 // of the operands it may be overwritten (and its memory reused).
  13 // To enable chaining of operations, the result is also returned.
  14 //
  15 package big
  16
  17 // This file contains operations on unsigned multi-precision integers.
  18 // These are the building blocks for the operations on signed integers
  19 // and rationals.
  20
  21 import (
  22         "io"
  23         "os"
  24         "rand"
  25 )
  26
  27 // An unsigned integer x of the form
  28 //
  29 //   x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
  30 //
  31 // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
  32 // with the digits x[i] as the slice elements.
  33 //
  34 // A number is normalized if the slice contains no leading 0 digits.
  35 // During arithmetic operations, denormalized values may occur but are
  36 // always normalized before returning the final result. The normalized
  37 // representation of 0 is the empty or nil slice (length = 0).
  38
  39 type nat []Word
  40
  41 var (
  42         natOne = nat{1}
  43         natTwo = nat{2}
  44         natTen = nat{10}
  45 )
  46
  47 func (z nat) clear() {
  48         for i := range z {
  49                 z[i] = 0
  50         }
  51 }
  52
  53 func (z nat) norm() nat {
  54         i := len(z)
  55         for i > 0 && z[i-1] == 0 {
  56                 i--
  57         }
  58         return z[0:i]
  59 }
  60
  61 func (z nat) make(n int) nat {
  62         if n <= cap(z) {
  63                 return z[0:n] // reuse z
  64         }
  65         // Choosing a good value for e has significant performance impact
  66         // because it increases the chance that a value can be reused.
  67         const e = 4 // extra capacity
  68         return make(nat, n, n+e)
  69 }
  70
  71 func (z nat) setWord(x Word) nat {
  72         if x == 0 {
  73                 return z.make(0)
  74         }
  75         z = z.make(1)
  76         z[0] = x
  77         return z
  78 }
  79
  80 func (z nat) setUint64(x uint64) nat {
  81         // single-digit values
  82         if w := Word(x); uint64(w) == x {
  83                 return z.setWord(w)
  84         }
  85
  86         // compute number of words n required to represent x
  87         n := 0
  88         for t := x; t > 0; t >>= _W {
  89                 n++
  90         }
  91
  92         // split x into n words
  93         z = z.make(n)
  94         for i := range z {
  95                 z[i] = Word(x & _M)
  96                 x >>= _W
  97         }
  98
  99         return z
 100 }
 101
 102 func (z nat) set(x nat) nat {
 103         z = z.make(len(x))
 104         copy(z, x)
 105         return z
 106 }
 107
 108 func (z nat) add(x, y nat) nat {
 109         m := len(x)
 110         n := len(y)
 111
 112         switch {
 113         case m < n:
 114                 return z.add(y, x)
 115         case m == 0:
 116                 // n == 0 because m >= n; result is 0
 117                 return z.make(0)
 118         case n == 0:
 119                 // result is x
 120                 return z.set(x)
 121         }
 122         // m > 0
 123
 124         z = z.make(m + 1)
 125         c := addVV(z[0:n], x, y)
 126         if m > n {
 127                 c = addVW(z[n:m], x[n:], c)
 128         }
 129         z[m] = c
 130
 131         return z.norm()
 132 }
 133
 134 func (z nat) sub(x, y nat) nat {
 135         m := len(x)
 136         n := len(y)
 137
 138         switch {
 139         case m < n:
 140                 panic("underflow")
 141         case m == 0:
 142                 // n == 0 because m >= n; result is 0
 143                 return z.make(0)
 144         case n == 0:
 145                 // result is x
 146                 return z.set(x)
 147         }
 148         // m > 0
 149
 150         z = z.make(m)
 151         c := subVV(z[0:n], x, y)
 152         if m > n {
 153                 c = subVW(z[n:], x[n:], c)
 154         }
 155         if c != 0 {
 156                 panic("underflow")
 157         }
 158
 159         return z.norm()
 160 }
 161
 162 func (x nat) cmp(y nat) (r int) {
 163         m := len(x)
 164         n := len(y)
 165         if m != n || m == 0 {
 166                 switch {
 167                 case m < n:
 168                         r = -1
 169                 case m > n:
 170                         r = 1
 171                 }
 172                 return
 173         }
 174
 175         i := m - 1
 176         for i > 0 && x[i] == y[i] {
 177                 i--
 178         }
 179
 180         switch {
 181         case x[i] < y[i]:
 182                 r = -1
 183         case x[i] > y[i]:
 184                 r = 1
 185         }
 186         return
 187 }
 188
 189 func (z nat) mulAddWW(x nat, y, r Word) nat {
 190         m := len(x)
 191         if m == 0 || y == 0 {
 192                 return z.setWord(r) // result is r
 193         }
 194         // m > 0
 195
 196         z = z.make(m + 1)
 197         z[m] = mulAddVWW(z[0:m], x, y, r)
 198
 199         return z.norm()
 200 }
 201
 202 // basicMul multiplies x and y and leaves the result in z.
 203 // The (non-normalized) result is placed in z[0 : len(x) + len(y)].
 204 func basicMul(z, x, y nat) {
 205         z[0 : len(x)+len(y)].clear() // initialize z
 206         for i, d := range y {
 207                 if d != 0 {
 208                         z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
 209                 }
 210         }
 211 }
 212
 213 // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
 214 // Factored out for readability - do not use outside karatsuba.
 215 func karatsubaAdd(z, x nat, n int) {
 216         if c := addVV(z[0:n], z, x); c != 0 {
 217                 addVW(z[n:n+n>>1], z[n:], c)
 218         }
 219 }
 220
 221 // Like karatsubaAdd, but does subtract.
 222 func karatsubaSub(z, x nat, n int) {
 223         if c := subVV(z[0:n], z, x); c != 0 {
 224                 subVW(z[n:n+n>>1], z[n:], c)
 225         }
 226 }
 227
 228 // Operands that are shorter than karatsubaThreshold are multiplied using
 229 // "grade school" multiplication; for longer operands the Karatsuba algorithm
 230 // is used.
 231 var karatsubaThreshold int = 32 // computed by calibrate.go
 232
 233 // karatsuba multiplies x and y and leaves the result in z.
 234 // Both x and y must have the same length n and n must be a
 235 // power of 2. The result vector z must have len(z) >= 6*n.
 236 // The (non-normalized) result is placed in z[0 : 2*n].
 237 func karatsuba(z, x, y nat) {
 238         n := len(y)
 239
 240         // Switch to basic multiplication if numbers are odd or small.
 241         // (n is always even if karatsubaThreshold is even, but be
 242         // conservative)
 243         if n&1 != 0 || n < karatsubaThreshold || n < 2 {
 244                 basicMul(z, x, y)
 245                 return
 246         }
 247         // n&1 == 0 && n >= karatsubaThreshold && n >= 2
 248
 249         // Karatsuba multiplication is based on the observation that
 250         // for two numbers x and y with:
 251         //
 252         //   x = x1*b + x0
 253         //   y = y1*b + y0
 254         //
 255         // the product x*y can be obtained with 3 products z2, z1, z0
 256         // instead of 4:
 257         //
 258         //   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
 259         //       =    z2*b*b +              z1*b +    z0
 260         //
 261         // with:
 262         //
 263         //   xd = x1 - x0
 264         //   yd = y0 - y1
 265         //
 266         //   z1 =      xd*yd                    + z1 + z0
 267         //      = (x1-x0)*(y0 - y1)             + z1 + z0
 268         //      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z1 + z0
 269         //      = x1*y0 -    z1 -    z0 + x0*y1 + z1 + z0
 270         //      = x1*y0                 + x0*y1
 271
 272         // split x, y into "digits"
 273         n2 := n >> 1              // n2 >= 1
 274         x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
 275         y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
 276
 277         // z is used for the result and temporary storage:
 278         //
 279         //   6*n     5*n     4*n     3*n     2*n     1*n     0*n
 280         // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
 281         //
 282         // For each recursive call of karatsuba, an unused slice of
 283         // z is passed in that has (at least) half the length of the
 284         // caller's z.
 285
 286         // compute z0 and z2 with the result "in place" in z
 287         karatsuba(z, x0, y0)     // z0 = x0*y0
 288         karatsuba(z[n:], x1, y1) // z2 = x1*y1
 289
 290         // compute xd (or the negative value if underflow occurs)
 291         s := 1 // sign of product xd*yd
 292         xd := z[2*n : 2*n+n2]
 293         if subVV(xd, x1, x0) != 0 { // x1-x0
 294                 s = -s
 295                 subVV(xd, x0, x1) // x0-x1
 296         }
 297
 298         // compute yd (or the negative value if underflow occurs)
 299         yd := z[2*n+n2 : 3*n]
 300         if subVV(yd, y0, y1) != 0 { // y0-y1
 301                 s = -s
 302                 subVV(yd, y1, y0) // y1-y0
 303         }
 304
 305         // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
 306         // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
 307         p := z[n*3:]
 308         karatsuba(p, xd, yd)
 309
 310         // save original z2:z0
 311         // (ok to use upper half of z since we're done recursing)
 312         r := z[n*4:]
 313         copy(r, z)
 314
 315         // add up all partial products
 316         //
 317         //   2*n     n     0
 318         // z = [ z2  | z0  ]
 319         //   +    [ z0  ]
 320         //   +    [ z2  ]
 321         //   +    [  p  ]
 322         //
 323         karatsubaAdd(z[n2:], r, n)
 324         karatsubaAdd(z[n2:], r[n:], n)
 325         if s > 0 {
 326                 karatsubaAdd(z[n2:], p, n)
 327         } else {
 328                 karatsubaSub(z[n2:], p, n)
 329         }
 330 }
 331
 332 // alias returns true if x and y share the same base array.
 333 func alias(x, y nat) bool {
 334         return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
 335 }
 336
 337 // addAt implements z += x*(1<<(_W*i)); z must be long enough.
 338 // (we don't use nat.add because we need z to stay the same
 339 // slice, and we don't need to normalize z after each addition)
 340 func addAt(z, x nat, i int) {
 341         if n := len(x); n > 0 {
 342                 if c := addVV(z[i:i+n], z[i:], x); c != 0 {
 343                         j := i + n
 344                         if j < len(z) {
 345                                 addVW(z[j:], z[j:], c)
 346                         }
 347                 }
 348         }
 349 }
 350
 351 func max(x, y int) int {
 352         if x > y {
 353                 return x
 354         }
 355         return y
 356 }
 357
 358 // karatsubaLen computes an approximation to the maximum k <= n such that
 359 // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
 360 // result is the largest number that can be divided repeatedly by 2 before
 361 // becoming about the value of karatsubaThreshold.
 362 func karatsubaLen(n int) int {
 363         i := uint(0)
 364         for n > karatsubaThreshold {
 365                 n >>= 1
 366                 i++
 367         }
 368         return n << i
 369 }
 370
 371 func (z nat) mul(x, y nat) nat {
 372         m := len(x)
 373         n := len(y)
 374
 375         switch {
 376         case m < n:
 377                 return z.mul(y, x)
 378         case m == 0 || n == 0:
 379                 return z.make(0)
 380         case n == 1:
 381                 return z.mulAddWW(x, y[0], 0)
 382         }
 383         // m >= n > 1
 384
 385         // determine if z can be reused
 386         if alias(z, x) || alias(z, y) {
 387                 z = nil // z is an alias for x or y - cannot reuse
 388         }
 389
 390         // use basic multiplication if the numbers are small
 391         if n < karatsubaThreshold || n < 2 {
 392                 z = z.make(m + n)
 393                 basicMul(z, x, y)
 394                 return z.norm()
 395         }
 396         // m >= n && n >= karatsubaThreshold && n >= 2
 397
 398         // determine Karatsuba length k such that
 399         //
 400         //   x = x1*b + x0
 401         //   y = y1*b + y0  (and k <= len(y), which implies k <= len(x))
 402         //   b = 1<<(_W*k)  ("base" of digits xi, yi)
 403         //
 404         k := karatsubaLen(n)
 405         // k <= n
 406
 407         // multiply x0 and y0 via Karatsuba
 408         x0 := x[0:k]              // x0 is not normalized
 409         y0 := y[0:k]              // y0 is not normalized
 410         z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
 411         karatsuba(z, x0, y0)
 412         z = z[0 : m+n] // z has final length but may be incomplete, upper portion is garbage
 413
 414         // If x1 and/or y1 are not 0, add missing terms to z explicitly:
 415         //
 416         //     m+n       2*k       0
 417         //   z = [   ...   | x0*y0 ]
 418         //     +   [ x1*y1 ]
 419         //     +   [ x1*y0 ]
 420         //     +   [ x0*y1 ]
 421         //
 422         if k < n || m != n {
 423                 x1 := x[k:] // x1 is normalized because x is
 424                 y1 := y[k:] // y1 is normalized because y is
 425                 var t nat
 426                 t = t.mul(x1, y1)
 427                 copy(z[2*k:], t)
 428                 z[2*k+len(t):].clear() // upper portion of z is garbage
 429                 t = t.mul(x1, y0.norm())
 430                 addAt(z, t, k)
 431                 t = t.mul(x0.norm(), y1)
 432                 addAt(z, t, k)
 433         }
 434
 435         return z.norm()
 436 }
 437
 438 // mulRange computes the product of all the unsigned integers in the
 439 // range [a, b] inclusively. If a > b (empty range), the result is 1.
 440 func (z nat) mulRange(a, b uint64) nat {
 441         switch {
 442         case a == 0:
 443                 // cut long ranges short (optimization)
 444                 return z.setUint64(0)
 445         case a > b:
 446                 return z.setUint64(1)
 447         case a == b:
 448                 return z.setUint64(a)
 449         case a+1 == b:
 450                 return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
 451         }
 452         m := (a + b) / 2
 453         return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
 454 }
 455
 456 // q = (x-r)/y, with 0 <= r < y
 457 func (z nat) divW(x nat, y Word) (q nat, r Word) {
 458         m := len(x)
 459         switch {
 460         case y == 0:
 461                 panic("division by zero")
 462         case y == 1:
 463                 q = z.set(x) // result is x
 464                 return
 465         case m == 0:
 466                 q = z.make(0) // result is 0
 467                 return
 468         }
 469         // m > 0
 470         z = z.make(m)
 471         r = divWVW(z, 0, x, y)
 472         q = z.norm()
 473         return
 474 }
 475
 476 func (z nat) div(z2, u, v nat) (q, r nat) {
 477         if len(v) == 0 {
 478                 panic("division by zero")
 479         }
 480
 481         if u.cmp(v) < 0 {
 482                 q = z.make(0)
 483                 r = z2.set(u)
 484                 return
 485         }
 486
 487         if len(v) == 1 {
 488                 var rprime Word
 489                 q, rprime = z.divW(u, v[0])
 490                 if rprime > 0 {
 491                         r = z2.make(1)
 492                         r[0] = rprime
 493                 } else {
 494                         r = z2.make(0)
 495                 }
 496                 return
 497         }
 498
 499         q, r = z.divLarge(z2, u, v)
 500         return
 501 }
 502
 503 // q = (uIn-r)/v, with 0 <= r < y
 504 // Uses z as storage for q, and u as storage for r if possible.
 505 // See Knuth, Volume 2, section 4.3.1, Algorithm D.
 506 // Preconditions:
 507 //    len(v) >= 2
 508 //    len(uIn) >= len(v)
 509 func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
 510         n := len(v)
 511         m := len(uIn) - n
 512
 513         // determine if z can be reused
 514         // TODO(gri) should find a better solution - this if statement
 515         //           is very costly (see e.g. time pidigits -s -n 10000)
 516         if alias(z, uIn) || alias(z, v) {
 517                 z = nil // z is an alias for uIn or v - cannot reuse
 518         }
 519         q = z.make(m + 1)
 520
 521         qhatv := make(nat, n+1)
 522         if alias(u, uIn) || alias(u, v) {
 523                 u = nil // u is an alias for uIn or v - cannot reuse
 524         }
 525         u = u.make(len(uIn) + 1)
 526         u.clear()
 527
 528         // D1.
 529         shift := leadingZeros(v[n-1])
 530         if shift > 0 {
 531                 // do not modify v, it may be used by another goroutine simultaneously
 532                 v1 := make(nat, n)
 533                 shlVU(v1, v, shift)
 534                 v = v1
 535         }
 536         u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
 537
 538         // D2.
 539         for j := m; j >= 0; j-- {
 540                 // D3.
 541                 qhat := Word(_M)
 542                 if u[j+n] != v[n-1] {
 543                         var rhat Word
 544                         qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
 545
 546                         // x1 | x2 = q̂v_{n-2}
 547                         x1, x2 := mulWW(qhat, v[n-2])
 548                         // test if q̂v_{n-2} > br̂ + u_{j+n-2}
 549                         for greaterThan(x1, x2, rhat, u[j+n-2]) {
 550                                 qhat--
 551                                 prevRhat := rhat
 552                                 rhat += v[n-1]
 553                                 // v[n-1] >= 0, so this tests for overflow.
 554                                 if rhat < prevRhat {
 555                                         break
 556                                 }
 557                                 x1, x2 = mulWW(qhat, v[n-2])
 558                         }
 559                 }
 560
 561                 // D4.
 562                 qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
 563
 564                 c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
 565                 if c != 0 {
 566                         c := addVV(u[j:j+n], u[j:], v)
 567                         u[j+n] += c
 568                         qhat--
 569                 }
 570
 571                 q[j] = qhat
 572         }
 573
 574         q = q.norm()
 575         shrVU(u, u, shift)
 576         r = u.norm()
 577
 578         return q, r
 579 }
 580
 581 // Length of x in bits. x must be normalized.
 582 func (x nat) bitLen() int {
 583         if i := len(x) - 1; i >= 0 {
 584                 return i*_W + bitLen(x[i])
 585         }
 586         return 0
 587 }
 588
 589 // MaxBase is the largest number base accepted for string conversions.
 590 const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
 591
 592
 593 func hexValue(ch int) Word {
 594         d := MaxBase + 1 // illegal base
 595         switch {
 596         case '0' <= ch && ch <= '9':
 597                 d = ch - '0'
 598         case 'a' <= ch && ch <= 'z':
 599                 d = ch - 'a' + 10
 600         case 'A' <= ch && ch <= 'Z':
 601                 d = ch - 'A' + 10
 602         }
 603         return Word(d)
 604 }
 605
 606 // scan sets z to the natural number corresponding to the longest possible prefix
 607 // read from r representing an unsigned integer in a given conversion base.
 608 // It returns z, the actual conversion base used, and an error, if any. In the
 609 // error case, the value of z is undefined. The syntax follows the syntax of
 610 // unsigned integer literals in Go.
 611 //
 612 // The base argument must be 0 or a value from 2 through MaxBase. If the base
 613 // is 0, the string prefix determines the actual conversion base. A prefix of
 614 // ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
 615 // ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
 616 //
 617 func (z nat) scan(r io.RuneScanner, base int) (nat, int, os.Error) {
 618         // reject illegal bases
 619         if base < 0 || base == 1 || MaxBase < base {
 620                 return z, 0, os.NewError("illegal number base")
 621         }
 622
 623         // one char look-ahead
 624         ch, _, err := r.ReadRune()
 625         if err != nil {
 626                 return z, 0, err
 627         }
 628
 629         // determine base if necessary
 630         b := Word(base)
 631         if base == 0 {
 632                 b = 10
 633                 if ch == '0' {
 634                         switch ch, _, err = r.ReadRune(); err {
 635                         case nil:
 636                                 b = 8
 637                                 switch ch {
 638                                 case 'x', 'X':
 639                                         b = 16
 640                                 case 'b', 'B':
 641                                         b = 2
 642                                 }
 643                                 if b == 2 || b == 16 {
 644                                         if ch, _, err = r.ReadRune(); err != nil {
 645                                                 return z, 0, err
 646                                         }
 647                                 }
 648                         case os.EOF:
 649                                 return z.make(0), 10, nil
 650                         default:
 651                                 return z, 10, err
 652                         }
 653                 }
 654         }
 655
 656         // convert string
 657         // - group as many digits d as possible together into a "super-digit" dd with "super-base" bb
 658         // - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd
 659         z = z.make(0)
 660         bb := Word(1)
 661         dd := Word(0)
 662         for max := _M / b; ; {
 663                 d := hexValue(ch)
 664                 if d >= b {
 665                         r.UnreadRune() // ch does not belong to number anymore
 666                         break
 667                 }
 668
 669                 if bb <= max {
 670                         bb *= b
 671                         dd = dd*b + d
 672                 } else {
 673                         // bb * b would overflow
 674                         z = z.mulAddWW(z, bb, dd)
 675                         bb = b
 676                         dd = d
 677                 }
 678
 679                 if ch, _, err = r.ReadRune(); err != nil {
 680                         if err != os.EOF {
 681                                 return z, int(b), err
 682                         }
 683                         break
 684                 }
 685         }
 686
 687         switch {
 688         case bb > 1:
 689                 // there was at least one mantissa digit
 690                 z = z.mulAddWW(z, bb, dd)
 691         case base == 0 && b == 8:
 692                 // there was only the octal prefix 0 (possibly followed by digits > 7);
 693                 // return base 10, not 8
 694                 return z, 10, nil
 695         case base != 0 || b != 8:
 696                 // there was neither a mantissa digit nor the octal prefix 0
 697                 return z, int(b), os.NewError("syntax error scanning number")
 698         }
 699
 700         return z.norm(), int(b), nil
 701 }
 702
 703 // Character sets for string conversion.
 704 const (
 705         lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz"
 706         uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 707 )
 708
 709 // decimalString returns a decimal representation of x.
 710 // It calls x.string with the charset "0123456789".
 711 func (x nat) decimalString() string {
 712         return x.string(lowercaseDigits[0:10])
 713 }
 714
 715 // string converts x to a string using digits from a charset; a digit with
 716 // value d is represented by charset[d]. The conversion base is determined
 717 // by len(charset), which must be >= 2.
 718 func (x nat) string(charset string) string {
 719         b := Word(len(charset))
 720
 721         // special cases
 722         switch {
 723         case b < 2 || b > 256:
 724                 panic("illegal base")
 725         case len(x) == 0:
 726                 return string(charset[0])
 727         }
 728
 729         // allocate buffer for conversion
 730         i := x.bitLen()/log2(b) + 1 // +1: round up
 731         s := make([]byte, i)
 732
 733         // special case: power of two bases can avoid divisions completely
 734         if b == b&-b {
 735                 // shift is base-b digit size in bits
 736                 shift := uint(trailingZeroBits(b)) // shift > 0 because b >= 2
 737                 mask := Word(1)<<shift - 1
 738                 w := x[0]
 739                 nbits := uint(_W) // number of unprocessed bits in w
 740
 741                 // convert less-significant words
 742                 for k := 1; k < len(x); k++ {
 743                         // convert full digits
 744                         for nbits >= shift {
 745                                 i--
 746                                 s[i] = charset[w&mask]
 747                                 w >>= shift
 748                                 nbits -= shift
 749                         }
 750
 751                         // convert any partial leading digit and advance to next word
 752                         if nbits == 0 {
 753                                 // no partial digit remaining, just advance
 754                                 w = x[k]
 755                                 nbits = _W
 756                         } else {
 757                                 // partial digit in current (k-1) and next (k) word
 758                                 w |= x[k] << nbits
 759                                 i--
 760                                 s[i] = charset[w&mask]
 761
 762                                 // advance
 763                                 w = x[k] >> (shift - nbits)
 764                                 nbits = _W - (shift - nbits)
 765                         }
 766                 }
 767
 768                 // convert digits of most-significant word (omit leading zeros)
 769                 for nbits >= 0 && w != 0 {
 770                         i--
 771                         s[i] = charset[w&mask]
 772                         w >>= shift
 773                         nbits -= shift
 774                 }
 775
 776                 return string(s[i:])
 777         }
 778
 779         // general case: extract groups of digits by multiprecision division
 780
 781         // maximize ndigits where b**ndigits < 2^_W; bb (big base) is b**ndigits
 782         bb := Word(1)
 783         ndigits := 0
 784         for max := Word(_M / b); bb <= max; bb *= b {
 785                 ndigits++
 786         }
 787
 788         // preserve x, create local copy for use in repeated divisions
 789         q := nat(nil).set(x)
 790         var r Word
 791
 792         // convert
 793         if b == 10 { // hard-coding for 10 here speeds this up by 1.25x
 794                 for len(q) > 0 {
 795                         // extract least significant, base bb "digit"
 796                         q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW
 797                         if len(q) == 0 {
 798                                 // skip leading zeros in most-significant group of digits
 799                                 for j := 0; j < ndigits && r != 0; j++ {
 800                                         i--
 801                                         s[i] = charset[r%10]
 802                                         r /= 10
 803                                 }
 804                         } else {
 805                                 for j := 0; j < ndigits; j++ {
 806                                         i--
 807                                         s[i] = charset[r%10]
 808                                         r /= 10
 809                                 }
 810                         }
 811                 }
 812         } else {
 813                 for len(q) > 0 {
 814                         // extract least significant group of digits
 815                         q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW
 816                         if len(q) == 0 {
 817                                 // skip leading zeros in most-significant group of digits
 818                                 for j := 0; j < ndigits && r != 0; j++ {
 819                                         i--
 820                                         s[i] = charset[r%b]
 821                                         r /= b
 822                                 }
 823                         } else {
 824                                 for j := 0; j < ndigits; j++ {
 825                                         i--
 826                                         s[i] = charset[r%b]
 827                                         r /= b
 828                                 }
 829                         }
 830                 }
 831         }
 832
 833         return string(s[i:])
 834 }
 835
 836 const deBruijn32 = 0x077CB531
 837
 838 var deBruijn32Lookup = []byte{
 839         0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 840         31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
 841 }
 842
 843 const deBruijn64 = 0x03f79d71b4ca8b09
 844
 845 var deBruijn64Lookup = []byte{
 846         0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
 847         62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
 848         63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
 849         54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
 850 }
 851
 852 // trailingZeroBits returns the number of consecutive zero bits on the right
 853 // side of the given Word.
 854 // See Knuth, volume 4, section 7.3.1
 855 func trailingZeroBits(x Word) int {
 856         // x & -x leaves only the right-most bit set in the word. Let k be the
 857         // index of that bit. Since only a single bit is set, the value is two
 858         // to the power of k. Multiplying by a power of two is equivalent to
 859         // left shifting, in this case by k bits.  The de Bruijn constant is
 860         // such that all six bit, consecutive substrings are distinct.
 861         // Therefore, if we have a left shifted version of this constant we can
 862         // find by how many bits it was shifted by looking at which six bit
 863         // substring ended up at the top of the word.
 864         switch _W {
 865         case 32:
 866                 return int(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
 867         case 64:
 868                 return int(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
 869         default:
 870                 panic("Unknown word size")
 871         }
 872
 873         return 0
 874 }
 875
 876 // z = x << s
 877 func (z nat) shl(x nat, s uint) nat {
 878         m := len(x)
 879         if m == 0 {
 880                 return z.make(0)
 881         }
 882         // m > 0
 883
 884         n := m + int(s/_W)
 885         z = z.make(n + 1)
 886         z[n] = shlVU(z[n-m:n], x, s%_W)
 887         z[0 : n-m].clear()
 888
 889         return z.norm()
 890 }
 891
 892 // z = x >> s
 893 func (z nat) shr(x nat, s uint) nat {
 894         m := len(x)
 895         n := m - int(s/_W)
 896         if n <= 0 {
 897                 return z.make(0)
 898         }
 899         // n > 0
 900
 901         z = z.make(n)
 902         shrVU(z, x[m-n:], s%_W)
 903
 904         return z.norm()
 905 }
 906
 907 func (z nat) setBit(x nat, i uint, b uint) nat {
 908         j := int(i / _W)
 909         m := Word(1) << (i % _W)
 910         n := len(x)
 911         switch b {
 912         case 0:
 913                 z = z.make(n)
 914                 copy(z, x)
 915                 if j >= n {
 916                         // no need to grow
 917                         return z
 918                 }
 919                 z[j] &^= m
 920                 return z.norm()
 921         case 1:
 922                 if j >= n {
 923                         n = j + 1
 924                 }
 925                 z = z.make(n)
 926                 copy(z, x)
 927                 z[j] |= m
 928                 // no need to normalize
 929                 return z
 930         }
 931         panic("set bit is not 0 or 1")
 932 }
 933
 934 func (z nat) bit(i uint) uint {
 935         j := int(i / _W)
 936         if j >= len(z) {
 937                 return 0
 938         }
 939         return uint(z[j] >> (i % _W) & 1)
 940 }
 941
 942 func (z nat) and(x, y nat) nat {
 943         m := len(x)
 944         n := len(y)
 945         if m > n {
 946                 m = n
 947         }
 948         // m <= n
 949
 950         z = z.make(m)
 951         for i := 0; i < m; i++ {
 952                 z[i] = x[i] & y[i]
 953         }
 954
 955         return z.norm()
 956 }
 957
 958 func (z nat) andNot(x, y nat) nat {
 959         m := len(x)
 960         n := len(y)
 961         if n > m {
 962                 n = m
 963         }
 964         // m >= n
 965
 966         z = z.make(m)
 967         for i := 0; i < n; i++ {
 968                 z[i] = x[i] &^ y[i]
 969         }
 970         copy(z[n:m], x[n:m])
 971
 972         return z.norm()
 973 }
 974
 975 func (z nat) or(x, y nat) nat {
 976         m := len(x)
 977         n := len(y)
 978         s := x
 979         if m < n {
 980                 n, m = m, n
 981                 s = y
 982         }
 983         // m >= n
 984
 985         z = z.make(m)
 986         for i := 0; i < n; i++ {
 987                 z[i] = x[i] | y[i]
 988         }
 989         copy(z[n:m], s[n:m])
 990
 991         return z.norm()
 992 }
 993
 994 func (z nat) xor(x, y nat) nat {
 995         m := len(x)
 996         n := len(y)
 997         s := x
 998         if m < n {
 999                 n, m = m, n
1000                 s = y
1001         }
1002         // m >= n
1003
1004         z = z.make(m)
1005         for i := 0; i < n; i++ {
1006                 z[i] = x[i] ^ y[i]
1007         }
1008         copy(z[n:m], s[n:m])
1009
1010         return z.norm()
1011 }
1012
1013 // greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2)
1014 func greaterThan(x1, x2, y1, y2 Word) bool {
1015         return x1 > y1 || x1 == y1 && x2 > y2
1016 }
1017
1018 // modW returns x % d.
1019 func (x nat) modW(d Word) (r Word) {
1020         // TODO(agl): we don't actually need to store the q value.
1021         var q nat
1022         q = q.make(len(x))
1023         return divWVW(q, 0, x, d)
1024 }
1025
1026 // powersOfTwoDecompose finds q and k with x = q * 1<<k and q is odd, or q and k are 0.
1027 func (x nat) powersOfTwoDecompose() (q nat, k int) {
1028         if len(x) == 0 {
1029                 return x, 0
1030         }
1031
1032         // One of the words must be non-zero by definition,
1033         // so this loop will terminate with i < len(x), and
1034         // i is the number of 0 words.
1035         i := 0
1036         for x[i] == 0 {
1037                 i++
1038         }
1039         n := trailingZeroBits(x[i]) // x[i] != 0
1040
1041         q = make(nat, len(x)-i)
1042         shrVU(q, x[i:], uint(n))
1043
1044         q = q.norm()
1045         k = i*_W + n
1046         return
1047 }
1048
1049 // random creates a random integer in [0..limit), using the space in z if
1050 // possible. n is the bit length of limit.
1051 func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
1052         bitLengthOfMSW := uint(n % _W)
1053         if bitLengthOfMSW == 0 {
1054                 bitLengthOfMSW = _W
1055         }
1056         mask := Word((1 << bitLengthOfMSW) - 1)
1057         z = z.make(len(limit))
1058
1059         for {
1060                 for i := range z {
1061                         switch _W {
1062                         case 32:
1063                                 z[i] = Word(rand.Uint32())
1064                         case 64:
1065                                 z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
1066                         }
1067                 }
1068
1069                 z[len(limit)-1] &= mask
1070
1071                 if z.cmp(limit) < 0 {
1072                         break
1073                 }
1074         }
1075
1076         return z.norm()
1077 }
1078
1079 // If m != nil, expNN calculates x**y mod m. Otherwise it calculates x**y. It
1080 // reuses the storage of z if possible.
1081 func (z nat) expNN(x, y, m nat) nat {
1082         if alias(z, x) || alias(z, y) {
1083                 // We cannot allow in place modification of x or y.
1084                 z = nil
1085         }
1086
1087         if len(y) == 0 {
1088                 z = z.make(1)
1089                 z[0] = 1
1090                 return z
1091         }
1092
1093         if m != nil {
1094                 // We likely end up being as long as the modulus.
1095                 z = z.make(len(m))
1096         }
1097         z = z.set(x)
1098         v := y[len(y)-1]
1099         // It's invalid for the most significant word to be zero, therefore we
1100         // will find a one bit.
1101         shift := leadingZeros(v) + 1
1102         v <<= shift
1103         var q nat
1104
1105         const mask = 1 << (_W - 1)
1106
1107         // We walk through the bits of the exponent one by one. Each time we
1108         // see a bit, we square, thus doubling the power. If the bit is a one,
1109         // we also multiply by x, thus adding one to the power.
1110
1111         w := _W - int(shift)
1112         for j := 0; j < w; j++ {
1113                 z = z.mul(z, z)
1114
1115                 if v&mask != 0 {
1116                         z = z.mul(z, x)
1117                 }
1118
1119                 if m != nil {
1120                         q, z = q.div(z, z, m)
1121                 }
1122
1123                 v <<= 1
1124         }
1125
1126         for i := len(y) - 2; i >= 0; i-- {
1127                 v = y[i]
1128
1129                 for j := 0; j < _W; j++ {
1130                         z = z.mul(z, z)
1131
1132                         if v&mask != 0 {
1133                                 z = z.mul(z, x)
1134                         }
1135
1136                         if m != nil {
1137                                 q, z = q.div(z, z, m)
1138                         }
1139
1140                         v <<= 1
1141                 }
1142         }
1143
1144         return z
1145 }
1146
1147 // probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
1148 // If it returns true, n is prime with probability 1 - 1/4^reps.
1149 // If it returns false, n is not prime.
1150 func (n nat) probablyPrime(reps int) bool {
1151         if len(n) == 0 {
1152                 return false
1153         }
1154
1155         if len(n) == 1 {
1156                 if n[0] < 2 {
1157                         return false
1158                 }
1159
1160                 if n[0]%2 == 0 {
1161                         return n[0] == 2
1162                 }
1163
1164                 // We have to exclude these cases because we reject all
1165                 // multiples of these numbers below.
1166                 switch n[0] {
1167                 case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
1168                         return true
1169                 }
1170         }
1171
1172         const primesProduct32 = 0xC0CFD797         // Π {p ∈ primes, 2 < p <= 29}
1173         const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
1174
1175         var r Word
1176         switch _W {
1177         case 32:
1178                 r = n.modW(primesProduct32)
1179         case 64:
1180                 r = n.modW(primesProduct64 & _M)
1181         default:
1182                 panic("Unknown word size")
1183         }
1184
1185         if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
1186                 r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
1187                 return false
1188         }
1189
1190         if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
1191                 r%43 == 0 || r%47 == 0 || r%53 == 0) {
1192                 return false
1193         }
1194
1195         nm1 := nat(nil).sub(n, natOne)
1196         // 1<<k * q = nm1;
1197         q, k := nm1.powersOfTwoDecompose()
1198
1199         nm3 := nat(nil).sub(nm1, natTwo)
1200         rand := rand.New(rand.NewSource(int64(n[0])))
1201
1202         var x, y, quotient nat
1203         nm3Len := nm3.bitLen()
1204
1205 NextRandom:
1206         for i := 0; i < reps; i++ {
1207                 x = x.random(rand, nm3, nm3Len)
1208                 x = x.add(x, natTwo)
1209                 y = y.expNN(x, q, n)
1210                 if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
1211                         continue
1212                 }
1213                 for j := 1; j < k; j++ {
1214                         y = y.mul(y, y)
1215                         quotient, y = quotient.div(y, y, n)
1216                         if y.cmp(nm1) == 0 {
1217                                 continue NextRandom
1218                         }
1219                         if y.cmp(natOne) == 0 {
1220                                 return false
1221                         }
1222                 }
1223                 return false
1224         }
1225
1226         return true
1227 }
1228
1229 // bytes writes the value of z into buf using big-endian encoding.
1230 // len(buf) must be >= len(z)*_S. The value of z is encoded in the
1231 // slice buf[i:]. The number i of unused bytes at the beginning of
1232 // buf is returned as result.
1233 func (z nat) bytes(buf []byte) (i int) {
1234         i = len(buf)
1235         for _, d := range z {
1236                 for j := 0; j < _S; j++ {
1237                         i--
1238                         buf[i] = byte(d)
1239                         d >>= 8
1240                 }
1241         }
1242
1243         for i < len(buf) && buf[i] == 0 {
1244                 i++
1245         }
1246
1247         return
1248 }
1249
1250 // setBytes interprets buf as the bytes of a big-endian unsigned
1251 // integer, sets z to that value, and returns z.
1252 func (z nat) setBytes(buf []byte) nat {
1253         z = z.make((len(buf) + _S - 1) / _S)
1254
1255         k := 0
1256         s := uint(0)
1257         var d Word
1258         for i := len(buf); i > 0; i-- {
1259                 d |= Word(buf[i-1]) << s
1260                 if s += 8; s == _S*8 {
1261                         z[k] = d
1262                         k++
1263                         s = 0
1264                         d = 0
1265                 }
1266         }
1267         if k < len(z) {
1268                 z[k] = d
1269         }
1270
1271         return z.norm()
1272 }