...

# Source file src/math/big/nat.go

```     1	// Copyright 2009 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
4
5	// Package big implements multi-precision arithmetic (big numbers).
6	// The following numeric types are supported:
7	//
8	//	- Int	signed integers
9	//	- Rat	rational numbers
10	//
11	// Methods are typically of the form:
12	//
13	//	func (z *Int) Op(x, y *Int) *Int	(similar for *Rat)
14	//
15	// and implement operations z = x Op y with the result as receiver; if it
16	// is one of the operands it may be overwritten (and its memory reused).
17	// To enable chaining of operations, the result is also returned. Methods
18	// returning a result other than *Int or *Rat take one of the operands as
20	//
21	package big
22
23	// This file contains operations on unsigned multi-precision integers.
24	// These are the building blocks for the operations on signed integers
25	// and rationals.
26
27	import (
28		"errors"
29		"io"
30		"math"
31		"math/rand"
32		"sync"
33	)
34
35	// An unsigned integer x of the form
36	//
37	//   x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
38	//
39	// with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
40	// with the digits x[i] as the slice elements.
41	//
42	// A number is normalized if the slice contains no leading 0 digits.
43	// During arithmetic operations, denormalized values may occur but are
44	// always normalized before returning the final result. The normalized
45	// representation of 0 is the empty or nil slice (length = 0).
46	//
47	type nat []Word
48
49	var (
50		natOne = nat{1}
51		natTwo = nat{2}
52		natTen = nat{10}
53	)
54
55	func (z nat) clear() {
56		for i := range z {
57			z[i] = 0
58		}
59	}
60
61	func (z nat) norm() nat {
62		i := len(z)
63		for i > 0 && z[i-1] == 0 {
64			i--
65		}
66		return z[0:i]
67	}
68
69	func (z nat) make(n int) nat {
70		if n <= cap(z) {
71			return z[0:n] // reuse z
72		}
73		// Choosing a good value for e has significant performance impact
74		// because it increases the chance that a value can be reused.
75		const e = 4 // extra capacity
76		return make(nat, n, n+e)
77	}
78
79	func (z nat) setWord(x Word) nat {
80		if x == 0 {
81			return z.make(0)
82		}
83		z = z.make(1)
84		z[0] = x
85		return z
86	}
87
88	func (z nat) setUint64(x uint64) nat {
89		// single-digit values
90		if w := Word(x); uint64(w) == x {
91			return z.setWord(w)
92		}
93
94		// compute number of words n required to represent x
95		n := 0
96		for t := x; t > 0; t >>= _W {
97			n++
98		}
99
100		// split x into n words
101		z = z.make(n)
102		for i := range z {
103			z[i] = Word(x & _M)
104			x >>= _W
105		}
106
107		return z
108	}
109
110	func (z nat) set(x nat) nat {
111		z = z.make(len(x))
112		copy(z, x)
113		return z
114	}
115
116	func (z nat) add(x, y nat) nat {
117		m := len(x)
118		n := len(y)
119
120		switch {
121		case m < n:
123		case m == 0:
124			// n == 0 because m >= n; result is 0
125			return z.make(0)
126		case n == 0:
127			// result is x
128			return z.set(x)
129		}
130		// m > 0
131
132		z = z.make(m + 1)
133		c := addVV(z[0:n], x, y)
134		if m > n {
135			c = addVW(z[n:m], x[n:], c)
136		}
137		z[m] = c
138
139		return z.norm()
140	}
141
142	func (z nat) sub(x, y nat) nat {
143		m := len(x)
144		n := len(y)
145
146		switch {
147		case m < n:
148			panic("underflow")
149		case m == 0:
150			// n == 0 because m >= n; result is 0
151			return z.make(0)
152		case n == 0:
153			// result is x
154			return z.set(x)
155		}
156		// m > 0
157
158		z = z.make(m)
159		c := subVV(z[0:n], x, y)
160		if m > n {
161			c = subVW(z[n:], x[n:], c)
162		}
163		if c != 0 {
164			panic("underflow")
165		}
166
167		return z.norm()
168	}
169
170	func (x nat) cmp(y nat) (r int) {
171		m := len(x)
172		n := len(y)
173		if m != n || m == 0 {
174			switch {
175			case m < n:
176				r = -1
177			case m > n:
178				r = 1
179			}
180			return
181		}
182
183		i := m - 1
184		for i > 0 && x[i] == y[i] {
185			i--
186		}
187
188		switch {
189		case x[i] < y[i]:
190			r = -1
191		case x[i] > y[i]:
192			r = 1
193		}
194		return
195	}
196
197	func (z nat) mulAddWW(x nat, y, r Word) nat {
198		m := len(x)
199		if m == 0 || y == 0 {
200			return z.setWord(r) // result is r
201		}
202		// m > 0
203
204		z = z.make(m + 1)
205		z[m] = mulAddVWW(z[0:m], x, y, r)
206
207		return z.norm()
208	}
209
210	// basicMul multiplies x and y and leaves the result in z.
211	// The (non-normalized) result is placed in z[0 : len(x) + len(y)].
212	func basicMul(z, x, y nat) {
213		z[0 : len(x)+len(y)].clear() // initialize z
214		for i, d := range y {
215			if d != 0 {
216				z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
217			}
218		}
219	}
220
221	// Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
222	// Factored out for readability - do not use outside karatsuba.
223	func karatsubaAdd(z, x nat, n int) {
224		if c := addVV(z[0:n], z, x); c != 0 {
226		}
227	}
228
229	// Like karatsubaAdd, but does subtract.
230	func karatsubaSub(z, x nat, n int) {
231		if c := subVV(z[0:n], z, x); c != 0 {
232			subVW(z[n:n+n>>1], z[n:], c)
233		}
234	}
235
236	// Operands that are shorter than karatsubaThreshold are multiplied using
237	// "grade school" multiplication; for longer operands the Karatsuba algorithm
238	// is used.
239	var karatsubaThreshold int = 40 // computed by calibrate.go
240
241	// karatsuba multiplies x and y and leaves the result in z.
242	// Both x and y must have the same length n and n must be a
243	// power of 2. The result vector z must have len(z) >= 6*n.
244	// The (non-normalized) result is placed in z[0 : 2*n].
245	func karatsuba(z, x, y nat) {
246		n := len(y)
247
248		// Switch to basic multiplication if numbers are odd or small.
249		// (n is always even if karatsubaThreshold is even, but be
250		// conservative)
251		if n&1 != 0 || n < karatsubaThreshold || n < 2 {
252			basicMul(z, x, y)
253			return
254		}
255		// n&1 == 0 && n >= karatsubaThreshold && n >= 2
256
257		// Karatsuba multiplication is based on the observation that
258		// for two numbers x and y with:
259		//
260		//   x = x1*b + x0
261		//   y = y1*b + y0
262		//
263		// the product x*y can be obtained with 3 products z2, z1, z0
265		//
266		//   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
267		//       =    z2*b*b +              z1*b +    z0
268		//
269		// with:
270		//
271		//   xd = x1 - x0
272		//   yd = y0 - y1
273		//
274		//   z1 =      xd*yd                    + z2 + z0
275		//      = (x1-x0)*(y0 - y1)             + z2 + z0
276		//      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0
277		//      = x1*y0 -    z2 -    z0 + x0*y1 + z2 + z0
278		//      = x1*y0                 + x0*y1
279
280		// split x, y into "digits"
281		n2 := n >> 1              // n2 >= 1
282		x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
283		y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
284
285		// z is used for the result and temporary storage:
286		//
287		//   6*n     5*n     4*n     3*n     2*n     1*n     0*n
288		// z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
289		//
290		// For each recursive call of karatsuba, an unused slice of
291		// z is passed in that has (at least) half the length of the
292		// caller's z.
293
294		// compute z0 and z2 with the result "in place" in z
295		karatsuba(z, x0, y0)     // z0 = x0*y0
296		karatsuba(z[n:], x1, y1) // z2 = x1*y1
297
298		// compute xd (or the negative value if underflow occurs)
299		s := 1 // sign of product xd*yd
300		xd := z[2*n : 2*n+n2]
301		if subVV(xd, x1, x0) != 0 { // x1-x0
302			s = -s
303			subVV(xd, x0, x1) // x0-x1
304		}
305
306		// compute yd (or the negative value if underflow occurs)
307		yd := z[2*n+n2 : 3*n]
308		if subVV(yd, y0, y1) != 0 { // y0-y1
309			s = -s
310			subVV(yd, y1, y0) // y1-y0
311		}
312
313		// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
314		// p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
315		p := z[n*3:]
316		karatsuba(p, xd, yd)
317
318		// save original z2:z0
319		// (ok to use upper half of z since we're done recursing)
320		r := z[n*4:]
321		copy(r, z[:n*2])
322
323		// add up all partial products
324		//
325		//   2*n     n     0
326		// z = [ z2  | z0  ]
327		//   +    [ z0  ]
328		//   +    [ z2  ]
329		//   +    [  p  ]
330		//
333		if s > 0 {
335		} else {
336			karatsubaSub(z[n2:], p, n)
337		}
338	}
339
340	// alias returns true if x and y share the same base array.
341	func alias(x, y nat) bool {
342		return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
343	}
344
345	// addAt implements z += x<<(_W*i); z must be long enough.
346	// (we don't use nat.add because we need z to stay the same
347	// slice, and we don't need to normalize z after each addition)
348	func addAt(z, x nat, i int) {
349		if n := len(x); n > 0 {
350			if c := addVV(z[i:i+n], z[i:], x); c != 0 {
351				j := i + n
352				if j < len(z) {
354				}
355			}
356		}
357	}
358
359	func max(x, y int) int {
360		if x > y {
361			return x
362		}
363		return y
364	}
365
366	// karatsubaLen computes an approximation to the maximum k <= n such that
367	// k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
368	// result is the largest number that can be divided repeatedly by 2 before
369	// becoming about the value of karatsubaThreshold.
370	func karatsubaLen(n int) int {
371		i := uint(0)
372		for n > karatsubaThreshold {
373			n >>= 1
374			i++
375		}
376		return n << i
377	}
378
379	func (z nat) mul(x, y nat) nat {
380		m := len(x)
381		n := len(y)
382
383		switch {
384		case m < n:
385			return z.mul(y, x)
386		case m == 0 || n == 0:
387			return z.make(0)
388		case n == 1:
390		}
391		// m >= n > 1
392
393		// determine if z can be reused
394		if alias(z, x) || alias(z, y) {
395			z = nil // z is an alias for x or y - cannot reuse
396		}
397
398		// use basic multiplication if the numbers are small
399		if n < karatsubaThreshold {
400			z = z.make(m + n)
401			basicMul(z, x, y)
402			return z.norm()
403		}
404		// m >= n && n >= karatsubaThreshold && n >= 2
405
406		// determine Karatsuba length k such that
407		//
408		//   x = xh*b + x0  (0 <= x0 < b)
409		//   y = yh*b + y0  (0 <= y0 < b)
410		//   b = 1<<(_W*k)  ("base" of digits xi, yi)
411		//
412		k := karatsubaLen(n)
413		// k <= n
414
415		// multiply x0 and y0 via Karatsuba
416		x0 := x[0:k]              // x0 is not normalized
417		y0 := y[0:k]              // y0 is not normalized
418		z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
419		karatsuba(z, x0, y0)
420		z = z[0 : m+n]  // z has final length but may be incomplete
421		z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m)
422
423		// If xh != 0 or yh != 0, add the missing terms to z. For
424		//
425		//   xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b)
426		//   yh =                         y1*b (0 <= y1 < b)
427		//
428		// the missing terms are
429		//
430		//   x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0
431		//
432		// since all the yi for i > 1 are 0 by choice of k: If any of them
433		// were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would
434		// be a larger valid threshold contradicting the assumption about k.
435		//
436		if k < n || m != n {
437			var t nat
438
440			x0 := x0.norm()
441			y1 := y[k:]       // y1 is normalized because y is
442			t = t.mul(x0, y1) // update t so we don't lose t's underlying array
444
446			y0 := y0.norm()
447			for i := k; i < len(x); i += k {
448				xi := x[i:]
449				if len(xi) > k {
450					xi = xi[:k]
451				}
452				xi = xi.norm()
453				t = t.mul(xi, y0)
455				t = t.mul(xi, y1)
457			}
458		}
459
460		return z.norm()
461	}
462
463	// mulRange computes the product of all the unsigned integers in the
464	// range [a, b] inclusively. If a > b (empty range), the result is 1.
465	func (z nat) mulRange(a, b uint64) nat {
466		switch {
467		case a == 0:
468			// cut long ranges short (optimization)
469			return z.setUint64(0)
470		case a > b:
471			return z.setUint64(1)
472		case a == b:
473			return z.setUint64(a)
474		case a+1 == b:
475			return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
476		}
477		m := (a + b) / 2
478		return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
479	}
480
481	// q = (x-r)/y, with 0 <= r < y
482	func (z nat) divW(x nat, y Word) (q nat, r Word) {
483		m := len(x)
484		switch {
485		case y == 0:
486			panic("division by zero")
487		case y == 1:
488			q = z.set(x) // result is x
489			return
490		case m == 0:
491			q = z.make(0) // result is 0
492			return
493		}
494		// m > 0
495		z = z.make(m)
496		r = divWVW(z, 0, x, y)
497		q = z.norm()
498		return
499	}
500
501	func (z nat) div(z2, u, v nat) (q, r nat) {
502		if len(v) == 0 {
503			panic("division by zero")
504		}
505
506		if u.cmp(v) < 0 {
507			q = z.make(0)
508			r = z2.set(u)
509			return
510		}
511
512		if len(v) == 1 {
513			var r2 Word
514			q, r2 = z.divW(u, v[0])
515			r = z2.setWord(r2)
516			return
517		}
518
519		q, r = z.divLarge(z2, u, v)
520		return
521	}
522
523	// q = (uIn-r)/v, with 0 <= r < y
524	// Uses z as storage for q, and u as storage for r if possible.
525	// See Knuth, Volume 2, section 4.3.1, Algorithm D.
526	// Preconditions:
527	//    len(v) >= 2
528	//    len(uIn) >= len(v)
529	func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
530		n := len(v)
531		m := len(uIn) - n
532
533		// determine if z can be reused
534		// TODO(gri) should find a better solution - this if statement
535		//           is very costly (see e.g. time pidigits -s -n 10000)
536		if alias(z, uIn) || alias(z, v) {
537			z = nil // z is an alias for uIn or v - cannot reuse
538		}
539		q = z.make(m + 1)
540
541		qhatv := make(nat, n+1)
542		if alias(u, uIn) || alias(u, v) {
543			u = nil // u is an alias for uIn or v - cannot reuse
544		}
545		u = u.make(len(uIn) + 1)
546		u.clear()
547
548		// D1.
550		if shift > 0 {
551			// do not modify v, it may be used by another goroutine simultaneously
552			v1 := make(nat, n)
553			shlVU(v1, v, shift)
554			v = v1
555		}
556		u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
557
558		// D2.
559		for j := m; j >= 0; j-- {
560			// D3.
561			qhat := Word(_M)
562			if u[j+n] != v[n-1] {
563				var rhat Word
564				qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
565
566				// x1 | x2 = q̂v_{n-2}
567				x1, x2 := mulWW(qhat, v[n-2])
568				// test if q̂v_{n-2} > br̂ + u_{j+n-2}
569				for greaterThan(x1, x2, rhat, u[j+n-2]) {
570					qhat--
571					prevRhat := rhat
572					rhat += v[n-1]
573					// v[n-1] >= 0, so this tests for overflow.
574					if rhat < prevRhat {
575						break
576					}
577					x1, x2 = mulWW(qhat, v[n-2])
578				}
579			}
580
581			// D4.
582			qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
583
584			c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
585			if c != 0 {
586				c := addVV(u[j:j+n], u[j:], v)
587				u[j+n] += c
588				qhat--
589			}
590
591			q[j] = qhat
592		}
593
594		q = q.norm()
595		shrVU(u, u, shift)
596		r = u.norm()
597
598		return q, r
599	}
600
601	// Length of x in bits. x must be normalized.
602	func (x nat) bitLen() int {
603		if i := len(x) - 1; i >= 0 {
604			return i*_W + bitLen(x[i])
605		}
606		return 0
607	}
608
609	// MaxBase is the largest number base accepted for string conversions.
610	const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
611
612	func hexValue(ch rune) Word {
613		d := int(MaxBase + 1) // illegal base
614		switch {
615		case '0' <= ch && ch <= '9':
616			d = int(ch - '0')
617		case 'a' <= ch && ch <= 'z':
618			d = int(ch - 'a' + 10)
619		case 'A' <= ch && ch <= 'Z':
620			d = int(ch - 'A' + 10)
621		}
622		return Word(d)
623	}
624
625	// scan sets z to the natural number corresponding to the longest possible prefix
626	// read from r representing an unsigned integer in a given conversion base.
627	// It returns z, the actual conversion base used, and an error, if any. In the
628	// error case, the value of z is undefined. The syntax follows the syntax of
629	// unsigned integer literals in Go.
630	//
631	// The base argument must be 0 or a value from 2 through MaxBase. If the base
632	// is 0, the string prefix determines the actual conversion base. A prefix of
633	// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
634	// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
635	//
636	func (z nat) scan(r io.RuneScanner, base int) (nat, int, error) {
637		// reject illegal bases
638		if base < 0 || base == 1 || MaxBase < base {
639			return z, 0, errors.New("illegal number base")
640		}
641
643		ch, _, err := r.ReadRune()
644		if err != nil {
645			return z, 0, err
646		}
647
648		// determine base if necessary
649		b := Word(base)
650		if base == 0 {
651			b = 10
652			if ch == '0' {
653				switch ch, _, err = r.ReadRune(); err {
654				case nil:
655					b = 8
656					switch ch {
657					case 'x', 'X':
658						b = 16
659					case 'b', 'B':
660						b = 2
661					}
662					if b == 2 || b == 16 {
663						if ch, _, err = r.ReadRune(); err != nil {
664							return z, 0, err
665						}
666					}
667				case io.EOF:
668					return z.make(0), 10, nil
669				default:
670					return z, 10, err
671				}
672			}
673		}
674
675		// convert string
676		// - group as many digits d as possible together into a "super-digit" dd with "super-base" bb
677		// - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd
678		z = z.make(0)
679		bb := Word(1)
680		dd := Word(0)
681		for max := _M / b; ; {
682			d := hexValue(ch)
683			if d >= b {
684				r.UnreadRune() // ch does not belong to number anymore
685				break
686			}
687
688			if bb <= max {
689				bb *= b
690				dd = dd*b + d
691			} else {
692				// bb * b would overflow
693				z = z.mulAddWW(z, bb, dd)
694				bb = b
695				dd = d
696			}
697
698			if ch, _, err = r.ReadRune(); err != nil {
699				if err != io.EOF {
700					return z, int(b), err
701				}
702				break
703			}
704		}
705
706		switch {
707		case bb > 1:
708			// there was at least one mantissa digit
709			z = z.mulAddWW(z, bb, dd)
710		case base == 0 && b == 8:
711			// there was only the octal prefix 0 (possibly followed by digits > 7);
712			// return base 10, not 8
713			return z, 10, nil
714		case base != 0 || b != 8:
715			// there was neither a mantissa digit nor the octal prefix 0
716			return z, int(b), errors.New("syntax error scanning number")
717		}
718
719		return z.norm(), int(b), nil
720	}
721
722	// Character sets for string conversion.
723	const (
724		lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz"
725		uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
726	)
727
728	// decimalString returns a decimal representation of x.
729	// It calls x.string with the charset "0123456789".
730	func (x nat) decimalString() string {
731		return x.string(lowercaseDigits[0:10])
732	}
733
734	// string converts x to a string using digits from a charset; a digit with
735	// value d is represented by charset[d]. The conversion base is determined
736	// by len(charset), which must be >= 2 and <= 256.
737	func (x nat) string(charset string) string {
738		b := Word(len(charset))
739
740		// special cases
741		switch {
742		case b < 2 || MaxBase > 256:
743			panic("illegal base")
744		case len(x) == 0:
745			return string(charset[0])
746		}
747
748		// allocate buffer for conversion
749		i := int(float64(x.bitLen())/math.Log2(float64(b))) + 1 // off by one at most
750		s := make([]byte, i)
751
752		// convert power of two and non power of two bases separately
753		if b == b&-b {
754			// shift is base-b digit size in bits
755			shift := trailingZeroBits(b) // shift > 0 because b >= 2
756			mask := Word(1)<<shift - 1
757			w := x[0]
758			nbits := uint(_W) // number of unprocessed bits in w
759
760			// convert less-significant words
761			for k := 1; k < len(x); k++ {
762				// convert full digits
763				for nbits >= shift {
764					i--
766					w >>= shift
767					nbits -= shift
768				}
769
770				// convert any partial leading digit and advance to next word
771				if nbits == 0 {
772					// no partial digit remaining, just advance
773					w = x[k]
774					nbits = _W
775				} else {
776					// partial digit in current (k-1) and next (k) word
777					w |= x[k] << nbits
778					i--
780
782					w = x[k] >> (shift - nbits)
783					nbits = _W - (shift - nbits)
784				}
785			}
786
787			// convert digits of most-significant word (omit leading zeros)
788			for nbits >= 0 && w != 0 {
789				i--
791				w >>= shift
792				nbits -= shift
793			}
794
795		} else {
796			// determine "big base"; i.e., the largest possible value bb
797			// that is a power of base b and still fits into a Word
798			// (as in 10^19 for 19 decimal digits in a 64bit Word)
799			bb := b      // big base is b**ndigits
800			ndigits := 1 // number of base b digits
801			for max := Word(_M / b); bb <= max; bb *= b {
802				ndigits++ // maximize ndigits where bb = b**ndigits, bb <= _M
803			}
804
805			// construct table of successive squares of bb*leafSize to use in subdivisions
806			// result (table != nil) <=> (len(x) > leafSize > 0)
807			table := divisors(len(x), b, ndigits, bb)
808
809			// preserve x, create local copy for use by convertWords
810			q := nat(nil).set(x)
811
812			// convert q to string s in base b
813			q.convertWords(s, charset, b, ndigits, bb, table)
814
816			// (x != 0; thus s must contain at least one non-zero digit
817			// and the loop will terminate)
818			i = 0
819			for zero := charset[0]; s[i] == zero; {
820				i++
821			}
822		}
823
824		return string(s[i:])
825	}
826
827	// Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
828	// by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
829	// repeated nat/Word division.
830	//
831	// The iterative method processes n Words by n divW() calls, each of which visits every Word in the
832	// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
833	// Recursive conversion divides q by its approximate square root, yielding two parts, each half
834	// the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
835	// plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
836	// is made better by splitting the subblocks recursively. Best is to split blocks until one more
837	// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
838	// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
839	// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
840	// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
841	// specific hardware.
842	//
843	func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) {
844		// split larger blocks recursively
845		if table != nil {
846			// len(q) > leafSize > 0
847			var r nat
848			index := len(table) - 1
849			for len(q) > leafSize {
850				// find divisor close to sqrt(q) if possible, but in any case < q
851				maxLength := q.bitLen()     // ~= log2 q, or at of least largest possible q of this bit length
852				minLength := maxLength >> 1 // ~= log2 sqrt(q)
853				for index > 0 && table[index-1].nbits > minLength {
854					index-- // desired
855				}
856				if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 {
857					index--
858					if index < 0 {
859						panic("internal inconsistency")
860					}
861				}
862
863				// split q into the two digit number (q'*bbb + r) to form independent subblocks
864				q, r = q.div(r, q, table[index].bbb)
865
866				// convert subblocks and collect results in s[:h] and s[h:]
867				h := len(s) - table[index].ndigits
868				r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index])
869				s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1])
870			}
871		}
872
873		// having split any large blocks now process the remaining (small) block iteratively
874		i := len(s)
875		var r Word
876		if b == 10 {
877			// hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants)
878			for len(q) > 0 {
879				// extract least significant, base bb "digit"
880				q, r = q.divW(q, bb)
881				for j := 0; j < ndigits && i > 0; j++ {
882					i--
883					// avoid % computation since r%10 == r - int(r/10)*10;
884					// this appears to be faster for BenchmarkString10000Base10
885					// and smaller strings (but a bit slower for larger ones)
886					t := r / 10
887					s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code
888					r = t
889				}
890			}
891		} else {
892			for len(q) > 0 {
893				// extract least significant, base bb "digit"
894				q, r = q.divW(q, bb)
895				for j := 0; j < ndigits && i > 0; j++ {
896					i--
897					s[i] = charset[r%b]
898					r /= b
899				}
900			}
901		}
902
903		// prepend high-order zeroes
904		zero := charset[0]
905		for i > 0 { // while need more leading zeroes
906			i--
907			s[i] = zero
908		}
909	}
910
911	// Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion)
912	// Benchmark and configure leafSize using: go test -bench="Leaf"
913	//   8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines)
914	//   8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU
915	var leafSize int = 8 // number of Word-size binary values treat as a monolithic block
916
917	type divisor struct {
918		bbb     nat // divisor
919		nbits   int // bit length of divisor (discounting leading zeroes) ~= log2(bbb)
920		ndigits int // digit length of divisor in terms of output base digits
921	}
922
923	var cacheBase10 struct {
924		sync.Mutex
925		table [64]divisor // cached divisors for base 10
926	}
927
928	// expWW computes x**y
929	func (z nat) expWW(x, y Word) nat {
930		return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil)
931	}
932
933	// construct table of powers of bb*leafSize to use in subdivisions
934	func divisors(m int, b Word, ndigits int, bb Word) []divisor {
935		// only compute table when recursive conversion is enabled and x is large
936		if leafSize == 0 || m <= leafSize {
937			return nil
938		}
939
940		// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
941		k := 1
942		for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 {
943			k++
944		}
945
946		// reuse and extend existing table of divisors or create new table as appropriate
947		var table []divisor // for b == 10, table overlaps with cacheBase10.table
948		if b == 10 {
949			cacheBase10.Lock()
950			table = cacheBase10.table[0:k] // reuse old table for this conversion
951		} else {
952			table = make([]divisor, k) // create new table for this conversion
953		}
954
955		// extend table
956		if table[k-1].ndigits == 0 {
957			// add new entries as needed
958			var larger nat
959			for i := 0; i < k; i++ {
960				if table[i].ndigits == 0 {
961					if i == 0 {
962						table[0].bbb = nat(nil).expWW(bb, Word(leafSize))
963						table[0].ndigits = ndigits * leafSize
964					} else {
965						table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
966						table[i].ndigits = 2 * table[i-1].ndigits
967					}
968
969					// optimization: exploit aggregated extra bits in macro blocks
970					larger = nat(nil).set(table[i].bbb)
971					for mulAddVWW(larger, larger, b, 0) == 0 {
972						table[i].bbb = table[i].bbb.set(larger)
973						table[i].ndigits++
974					}
975
976					table[i].nbits = table[i].bbb.bitLen()
977				}
978			}
979		}
980
981		if b == 10 {
982			cacheBase10.Unlock()
983		}
984
985		return table
986	}
987
988	const deBruijn32 = 0x077CB531
989
990	var deBruijn32Lookup = []byte{
991		0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
992		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
993	}
994
995	const deBruijn64 = 0x03f79d71b4ca8b09
996
997	var deBruijn64Lookup = []byte{
998		0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
999		62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
1000		63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
1001		54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
1002	}
1003
1004	// trailingZeroBits returns the number of consecutive least significant zero
1005	// bits of x.
1006	func trailingZeroBits(x Word) uint {
1007		// x & -x leaves only the right-most bit set in the word. Let k be the
1008		// index of that bit. Since only a single bit is set, the value is two
1009		// to the power of k. Multiplying by a power of two is equivalent to
1010		// left shifting, in this case by k bits.  The de Bruijn constant is
1011		// such that all six bit, consecutive substrings are distinct.
1012		// Therefore, if we have a left shifted version of this constant we can
1013		// find by how many bits it was shifted by looking at which six bit
1014		// substring ended up at the top of the word.
1015		// (Knuth, volume 4, section 7.3.1)
1016		switch _W {
1017		case 32:
1018			return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
1019		case 64:
1020			return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
1021		default:
1022			panic("unknown word size")
1023		}
1024	}
1025
1026	// trailingZeroBits returns the number of consecutive least significant zero
1027	// bits of x.
1028	func (x nat) trailingZeroBits() uint {
1029		if len(x) == 0 {
1030			return 0
1031		}
1032		var i uint
1033		for x[i] == 0 {
1034			i++
1035		}
1036		// x[i] != 0
1037		return i*_W + trailingZeroBits(x[i])
1038	}
1039
1040	// z = x << s
1041	func (z nat) shl(x nat, s uint) nat {
1042		m := len(x)
1043		if m == 0 {
1044			return z.make(0)
1045		}
1046		// m > 0
1047
1048		n := m + int(s/_W)
1049		z = z.make(n + 1)
1050		z[n] = shlVU(z[n-m:n], x, s%_W)
1051		z[0 : n-m].clear()
1052
1053		return z.norm()
1054	}
1055
1056	// z = x >> s
1057	func (z nat) shr(x nat, s uint) nat {
1058		m := len(x)
1059		n := m - int(s/_W)
1060		if n <= 0 {
1061			return z.make(0)
1062		}
1063		// n > 0
1064
1065		z = z.make(n)
1066		shrVU(z, x[m-n:], s%_W)
1067
1068		return z.norm()
1069	}
1070
1071	func (z nat) setBit(x nat, i uint, b uint) nat {
1072		j := int(i / _W)
1073		m := Word(1) << (i % _W)
1074		n := len(x)
1075		switch b {
1076		case 0:
1077			z = z.make(n)
1078			copy(z, x)
1079			if j >= n {
1080				// no need to grow
1081				return z
1082			}
1083			z[j] &^= m
1084			return z.norm()
1085		case 1:
1086			if j >= n {
1087				z = z.make(j + 1)
1088				z[n:].clear()
1089			} else {
1090				z = z.make(n)
1091			}
1092			copy(z, x)
1093			z[j] |= m
1094			// no need to normalize
1095			return z
1096		}
1097		panic("set bit is not 0 or 1")
1098	}
1099
1100	func (z nat) bit(i uint) uint {
1101		j := int(i / _W)
1102		if j >= len(z) {
1103			return 0
1104		}
1105		return uint(z[j] >> (i % _W) & 1)
1106	}
1107
1108	func (z nat) and(x, y nat) nat {
1109		m := len(x)
1110		n := len(y)
1111		if m > n {
1112			m = n
1113		}
1114		// m <= n
1115
1116		z = z.make(m)
1117		for i := 0; i < m; i++ {
1118			z[i] = x[i] & y[i]
1119		}
1120
1121		return z.norm()
1122	}
1123
1124	func (z nat) andNot(x, y nat) nat {
1125		m := len(x)
1126		n := len(y)
1127		if n > m {
1128			n = m
1129		}
1130		// m >= n
1131
1132		z = z.make(m)
1133		for i := 0; i < n; i++ {
1134			z[i] = x[i] &^ y[i]
1135		}
1136		copy(z[n:m], x[n:m])
1137
1138		return z.norm()
1139	}
1140
1141	func (z nat) or(x, y nat) nat {
1142		m := len(x)
1143		n := len(y)
1144		s := x
1145		if m < n {
1146			n, m = m, n
1147			s = y
1148		}
1149		// m >= n
1150
1151		z = z.make(m)
1152		for i := 0; i < n; i++ {
1153			z[i] = x[i] | y[i]
1154		}
1155		copy(z[n:m], s[n:m])
1156
1157		return z.norm()
1158	}
1159
1160	func (z nat) xor(x, y nat) nat {
1161		m := len(x)
1162		n := len(y)
1163		s := x
1164		if m < n {
1165			n, m = m, n
1166			s = y
1167		}
1168		// m >= n
1169
1170		z = z.make(m)
1171		for i := 0; i < n; i++ {
1172			z[i] = x[i] ^ y[i]
1173		}
1174		copy(z[n:m], s[n:m])
1175
1176		return z.norm()
1177	}
1178
1179	// greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2)
1180	func greaterThan(x1, x2, y1, y2 Word) bool {
1181		return x1 > y1 || x1 == y1 && x2 > y2
1182	}
1183
1184	// modW returns x % d.
1185	func (x nat) modW(d Word) (r Word) {
1186		// TODO(agl): we don't actually need to store the q value.
1187		var q nat
1188		q = q.make(len(x))
1189		return divWVW(q, 0, x, d)
1190	}
1191
1192	// random creates a random integer in [0..limit), using the space in z if
1193	// possible. n is the bit length of limit.
1194	func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
1195		if alias(z, limit) {
1196			z = nil // z is an alias for limit - cannot reuse
1197		}
1198		z = z.make(len(limit))
1199
1200		bitLengthOfMSW := uint(n % _W)
1201		if bitLengthOfMSW == 0 {
1202			bitLengthOfMSW = _W
1203		}
1204		mask := Word((1 << bitLengthOfMSW) - 1)
1205
1206		for {
1207			switch _W {
1208			case 32:
1209				for i := range z {
1210					z[i] = Word(rand.Uint32())
1211				}
1212			case 64:
1213				for i := range z {
1214					z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
1215				}
1216			default:
1217				panic("unknown word size")
1218			}
1220			if z.cmp(limit) < 0 {
1221				break
1222			}
1223		}
1224
1225		return z.norm()
1226	}
1227
1228	// If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
1229	// otherwise it sets z to x**y. The result is the value of z.
1230	func (z nat) expNN(x, y, m nat) nat {
1231		if alias(z, x) || alias(z, y) {
1232			// We cannot allow in-place modification of x or y.
1233			z = nil
1234		}
1235
1236		// x**y mod 1 == 0
1237		if len(m) == 1 && m[0] == 1 {
1238			return z.setWord(0)
1239		}
1240		// m == 0 || m > 1
1241
1242		// x**0 == 1
1243		if len(y) == 0 {
1244			return z.setWord(1)
1245		}
1246		// y > 0
1247
1248		if len(m) != 0 {
1249			// We likely end up being as long as the modulus.
1250			z = z.make(len(m))
1251		}
1252		z = z.set(x)
1253
1254		// If the base is non-trivial and the exponent is large, we use
1255		// 4-bit, windowed exponentiation. This involves precomputing 14 values
1256		// (x^2...x^15) but then reduces the number of multiply-reduces by a
1257		// third. Even for a 32-bit exponent, this reduces the number of
1258		// operations.
1259		if len(x) > 1 && len(y) > 1 && len(m) > 0 {
1260			return z.expNNWindowed(x, y, m)
1261		}
1262
1263		v := y[len(y)-1] // v > 0 because y is normalized and y > 0
1264		shift := leadingZeros(v) + 1
1265		v <<= shift
1266		var q nat
1267
1268		const mask = 1 << (_W - 1)
1269
1270		// We walk through the bits of the exponent one by one. Each time we
1271		// see a bit, we square, thus doubling the power. If the bit is a one,
1272		// we also multiply by x, thus adding one to the power.
1273
1274		w := _W - int(shift)
1275		// zz and r are used to avoid allocating in mul and div as
1276		// otherwise the arguments would alias.
1277		var zz, r nat
1278		for j := 0; j < w; j++ {
1279			zz = zz.mul(z, z)
1280			zz, z = z, zz
1281
1282			if v&mask != 0 {
1283				zz = zz.mul(z, x)
1284				zz, z = z, zz
1285			}
1286
1287			if len(m) != 0 {
1288				zz, r = zz.div(r, z, m)
1289				zz, r, q, z = q, z, zz, r
1290			}
1291
1292			v <<= 1
1293		}
1294
1295		for i := len(y) - 2; i >= 0; i-- {
1296			v = y[i]
1297
1298			for j := 0; j < _W; j++ {
1299				zz = zz.mul(z, z)
1300				zz, z = z, zz
1301
1302				if v&mask != 0 {
1303					zz = zz.mul(z, x)
1304					zz, z = z, zz
1305				}
1306
1307				if len(m) != 0 {
1308					zz, r = zz.div(r, z, m)
1309					zz, r, q, z = q, z, zz, r
1310				}
1311
1312				v <<= 1
1313			}
1314		}
1315
1316		return z.norm()
1317	}
1318
1319	// expNNWindowed calculates x**y mod m using a fixed, 4-bit window.
1320	func (z nat) expNNWindowed(x, y, m nat) nat {
1321		// zz and r are used to avoid allocating in mul and div as otherwise
1322		// the arguments would alias.
1323		var zz, r nat
1324
1325		const n = 4
1326		// powers[i] contains x^i.
1327		var powers [1 << n]nat
1328		powers[0] = natOne
1329		powers[1] = x
1330		for i := 2; i < 1<<n; i += 2 {
1331			p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
1332			*p = p.mul(*p2, *p2)
1333			zz, r = zz.div(r, *p, m)
1334			*p, r = r, *p
1335			*p1 = p1.mul(*p, x)
1336			zz, r = zz.div(r, *p1, m)
1337			*p1, r = r, *p1
1338		}
1339
1340		z = z.setWord(1)
1341
1342		for i := len(y) - 1; i >= 0; i-- {
1343			yi := y[i]
1344			for j := 0; j < _W; j += n {
1345				if i != len(y)-1 || j != 0 {
1346					// Unrolled loop for significant performance
1347					// gain.  Use go test -bench=".*" in crypto/rsa
1348					// to check performance before making changes.
1349					zz = zz.mul(z, z)
1350					zz, z = z, zz
1351					zz, r = zz.div(r, z, m)
1352					z, r = r, z
1353
1354					zz = zz.mul(z, z)
1355					zz, z = z, zz
1356					zz, r = zz.div(r, z, m)
1357					z, r = r, z
1358
1359					zz = zz.mul(z, z)
1360					zz, z = z, zz
1361					zz, r = zz.div(r, z, m)
1362					z, r = r, z
1363
1364					zz = zz.mul(z, z)
1365					zz, z = z, zz
1366					zz, r = zz.div(r, z, m)
1367					z, r = r, z
1368				}
1369
1370				zz = zz.mul(z, powers[yi>>(_W-n)])
1371				zz, z = z, zz
1372				zz, r = zz.div(r, z, m)
1373				z, r = r, z
1374
1375				yi <<= n
1376			}
1377		}
1378
1379		return z.norm()
1380	}
1381
1382	// probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
1383	// If it returns true, n is prime with probability 1 - 1/4^reps.
1384	// If it returns false, n is not prime.
1385	func (n nat) probablyPrime(reps int) bool {
1386		if len(n) == 0 {
1387			return false
1388		}
1389
1390		if len(n) == 1 {
1391			if n[0] < 2 {
1392				return false
1393			}
1394
1395			if n[0]%2 == 0 {
1396				return n[0] == 2
1397			}
1398
1399			// We have to exclude these cases because we reject all
1400			// multiples of these numbers below.
1401			switch n[0] {
1402			case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
1403				return true
1404			}
1405		}
1406
1407		const primesProduct32 = 0xC0CFD797         // Π {p ∈ primes, 2 < p <= 29}
1408		const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
1409
1410		var r Word
1411		switch _W {
1412		case 32:
1413			r = n.modW(primesProduct32)
1414		case 64:
1415			r = n.modW(primesProduct64 & _M)
1416		default:
1417			panic("Unknown word size")
1418		}
1419
1420		if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
1421			r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
1422			return false
1423		}
1424
1425		if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
1426			r%43 == 0 || r%47 == 0 || r%53 == 0) {
1427			return false
1428		}
1429
1430		nm1 := nat(nil).sub(n, natOne)
1431		// determine q, k such that nm1 = q << k
1432		k := nm1.trailingZeroBits()
1433		q := nat(nil).shr(nm1, k)
1434
1435		nm3 := nat(nil).sub(nm1, natTwo)
1436		rand := rand.New(rand.NewSource(int64(n[0])))
1437
1438		var x, y, quotient nat
1439		nm3Len := nm3.bitLen()
1440
1441	NextRandom:
1442		for i := 0; i < reps; i++ {
1443			x = x.random(rand, nm3, nm3Len)
1445			y = y.expNN(x, q, n)
1446			if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
1447				continue
1448			}
1449			for j := uint(1); j < k; j++ {
1450				y = y.mul(y, y)
1451				quotient, y = quotient.div(y, y, n)
1452				if y.cmp(nm1) == 0 {
1453					continue NextRandom
1454				}
1455				if y.cmp(natOne) == 0 {
1456					return false
1457				}
1458			}
1459			return false
1460		}
1461
1462		return true
1463	}
1464
1465	// bytes writes the value of z into buf using big-endian encoding.
1466	// len(buf) must be >= len(z)*_S. The value of z is encoded in the
1467	// slice buf[i:]. The number i of unused bytes at the beginning of
1468	// buf is returned as result.
1469	func (z nat) bytes(buf []byte) (i int) {
1470		i = len(buf)
1471		for _, d := range z {
1472			for j := 0; j < _S; j++ {
1473				i--
1474				buf[i] = byte(d)
1475				d >>= 8
1476			}
1477		}
1478
1479		for i < len(buf) && buf[i] == 0 {
1480			i++
1481		}
1482
1483		return
1484	}
1485
1486	// setBytes interprets buf as the bytes of a big-endian unsigned
1487	// integer, sets z to that value, and returns z.
1488	func (z nat) setBytes(buf []byte) nat {
1489		z = z.make((len(buf) + _S - 1) / _S)
1490
1491		k := 0
1492		s := uint(0)
1493		var d Word
1494		for i := len(buf); i > 0; i-- {
1495			d |= Word(buf[i-1]) << s
1496			if s += 8; s == _S*8 {
1497				z[k] = d
1498				k++
1499				s = 0
1500				d = 0
1501			}
1502		}
1503		if k < len(z) {
1504			z[k] = d
1505		}
1506
1507		return z.norm()
1508	}
1509
```

View as plain text