arith.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file provides Go implementations of elementary multi-precision
     6  // arithmetic operations on word vectors. These have the suffix _g.
     7  // These are needed for platforms without assembly implementations of these routines.
     8  // This file also contains elementary operations that can be implemented
     9  // sufficiently efficiently in Go.
    10  
    11  package big
    12  
    13  import "math/bits"
    14  
    15  // A Word represents a single digit of a multi-precision unsigned integer.
    16  type Word uint
    17  
    18  const (
    19  	_S = _W / 8 // word size in bytes
    20  
    21  	_W = bits.UintSize // word size in bits
    22  	_B = 1 << _W       // digit base
    23  	_M = _B - 1        // digit mask
    24  )
    25  
    26  // Many of the loops in this file are of the form
    27  //   for i := 0; i < len(z) && i < len(x) && i < len(y); i++
    28  // i < len(z) is the real condition.
    29  // However, checking i < len(x) && i < len(y) as well is faster than
    30  // having the compiler do a bounds check in the body of the loop;
    31  // remarkably it is even faster than hoisting the bounds check
    32  // out of the loop, by doing something like
    33  //   _, _ = x[len(z)-1], y[len(z)-1]
    34  // There are other ways to hoist the bounds check out of the loop,
    35  // but the compiler's BCE isn't powerful enough for them (yet?).
    36  // See the discussion in CL 164966.
    37  
    38  // ----------------------------------------------------------------------------
    39  // Elementary operations on words
    40  //
    41  // These operations are used by the vector operations below.
    42  
    43  // z1<<_W + z0 = x*y
    44  func mulWW(x, y Word) (z1, z0 Word) {
    45  	hi, lo := bits.Mul(uint(x), uint(y))
    46  	return Word(hi), Word(lo)
    47  }
    48  
    49  // z1<<_W + z0 = x*y + c
    50  func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
    51  	hi, lo := bits.Mul(uint(x), uint(y))
    52  	var cc uint
    53  	lo, cc = bits.Add(lo, uint(c), 0)
    54  	return Word(hi + cc), Word(lo)
    55  }
    56  
    57  // nlz returns the number of leading zeros in x.
    58  // Wraps bits.LeadingZeros call for convenience.
    59  func nlz(x Word) uint {
    60  	return uint(bits.LeadingZeros(uint(x)))
    61  }
    62  
    63  // The resulting carry c is either 0 or 1.
    64  func addVV_g(z, x, y []Word) (c Word) {
    65  	// The comment near the top of this file discusses this for loop condition.
    66  	for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
    67  		zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
    68  		z[i] = Word(zi)
    69  		c = Word(cc)
    70  	}
    71  	return
    72  }
    73  
    74  // The resulting carry c is either 0 or 1.
    75  func subVV_g(z, x, y []Word) (c Word) {
    76  	// The comment near the top of this file discusses this for loop condition.
    77  	for i := 0; i < len(z) && i < len(x) && i < len(y); i++ {
    78  		zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
    79  		z[i] = Word(zi)
    80  		c = Word(cc)
    81  	}
    82  	return
    83  }
    84  
    85  // The resulting carry c is either 0 or 1.
    86  func addVW_g(z, x []Word, y Word) (c Word) {
    87  	c = y
    88  	// The comment near the top of this file discusses this for loop condition.
    89  	for i := 0; i < len(z) && i < len(x); i++ {
    90  		zi, cc := bits.Add(uint(x[i]), uint(c), 0)
    91  		z[i] = Word(zi)
    92  		c = Word(cc)
    93  	}
    94  	return
    95  }
    96  
    97  // addVWlarge is addVW, but intended for large z.
    98  // The only difference is that we check on every iteration
    99  // whether we are done with carries,
   100  // and if so, switch to a much faster copy instead.
   101  // This is only a good idea for large z,
   102  // because the overhead of the check and the function call
   103  // outweigh the benefits when z is small.
   104  func addVWlarge(z, x []Word, y Word) (c Word) {
   105  	c = y
   106  	// The comment near the top of this file discusses this for loop condition.
   107  	for i := 0; i < len(z) && i < len(x); i++ {
   108  		if c == 0 {
   109  			copy(z[i:], x[i:])
   110  			return
   111  		}
   112  		zi, cc := bits.Add(uint(x[i]), uint(c), 0)
   113  		z[i] = Word(zi)
   114  		c = Word(cc)
   115  	}
   116  	return
   117  }
   118  
   119  func subVW_g(z, x []Word, y Word) (c Word) {
   120  	c = y
   121  	// The comment near the top of this file discusses this for loop condition.
   122  	for i := 0; i < len(z) && i < len(x); i++ {
   123  		zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
   124  		z[i] = Word(zi)
   125  		c = Word(cc)
   126  	}
   127  	return
   128  }
   129  
   130  // subVWlarge is to subVW as addVWlarge is to addVW.
   131  func subVWlarge(z, x []Word, y Word) (c Word) {
   132  	c = y
   133  	// The comment near the top of this file discusses this for loop condition.
   134  	for i := 0; i < len(z) && i < len(x); i++ {
   135  		if c == 0 {
   136  			copy(z[i:], x[i:])
   137  			return
   138  		}
   139  		zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
   140  		z[i] = Word(zi)
   141  		c = Word(cc)
   142  	}
   143  	return
   144  }
   145  
   146  func shlVU_g(z, x []Word, s uint) (c Word) {
   147  	if s == 0 {
   148  		copy(z, x)
   149  		return
   150  	}
   151  	if len(z) == 0 {
   152  		return
   153  	}
   154  	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
   155  	ŝ := _W - s
   156  	ŝ &= _W - 1 // ditto
   157  	c = x[len(z)-1] >> ŝ
   158  	for i := len(z) - 1; i > 0; i-- {
   159  		z[i] = x[i]<<s | x[i-1]>>ŝ
   160  	}
   161  	z[0] = x[0] << s
   162  	return
   163  }
   164  
   165  func shrVU_g(z, x []Word, s uint) (c Word) {
   166  	if s == 0 {
   167  		copy(z, x)
   168  		return
   169  	}
   170  	if len(z) == 0 {
   171  		return
   172  	}
   173  	if len(x) != len(z) {
   174  		// This is an invariant guaranteed by the caller.
   175  		panic("len(x) != len(z)")
   176  	}
   177  	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
   178  	ŝ := _W - s
   179  	ŝ &= _W - 1 // ditto
   180  	c = x[0] << ŝ
   181  	for i := 1; i < len(z); i++ {
   182  		z[i-1] = x[i-1]>>s | x[i]<<ŝ
   183  	}
   184  	z[len(z)-1] = x[len(z)-1] >> s
   185  	return
   186  }
   187  
   188  func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
   189  	c = r
   190  	// The comment near the top of this file discusses this for loop condition.
   191  	for i := 0; i < len(z) && i < len(x); i++ {
   192  		c, z[i] = mulAddWWW_g(x[i], y, c)
   193  	}
   194  	return
   195  }
   196  
   197  func addMulVVW_g(z, x []Word, y Word) (c Word) {
   198  	// The comment near the top of this file discusses this for loop condition.
   199  	for i := 0; i < len(z) && i < len(x); i++ {
   200  		z1, z0 := mulAddWWW_g(x[i], y, z[i])
   201  		lo, cc := bits.Add(uint(z0), uint(c), 0)
   202  		c, z[i] = Word(cc), Word(lo)
   203  		c += z1
   204  	}
   205  	return
   206  }
   207  
   208  // q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y.
   209  // An approximate reciprocal with a reference to "Improved Division by Invariant Integers
   210  // (IEEE Transactions on Computers, 11 Jun. 2010)"
   211  func divWW(x1, x0, y, m Word) (q, r Word) {
   212  	s := nlz(y)
   213  	if s != 0 {
   214  		x1 = x1<<s | x0>>(_W-s)
   215  		x0 <<= s
   216  		y <<= s
   217  	}
   218  	d := uint(y)
   219  	// We know that
   220  	//   m = ⎣(B^2-1)/d⎦-B
   221  	//   ⎣(B^2-1)/d⎦ = m+B
   222  	//   (B^2-1)/d = m+B+delta1    0 <= delta1 <= (d-1)/d
   223  	//   B^2/d = m+B+delta2        0 <= delta2 <= 1
   224  	// The quotient we're trying to compute is
   225  	//   quotient = ⎣(x1*B+x0)/d⎦
   226  	//            = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦
   227  	//            = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦
   228  	//            = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦
   229  	// The latter two terms of this three-term sum are between 0 and 1.
   230  	// So we can compute just the first term, and we will be low by at most 2.
   231  	t1, t0 := bits.Mul(uint(m), uint(x1))
   232  	_, c := bits.Add(t0, uint(x0), 0)
   233  	t1, _ = bits.Add(t1, uint(x1), c)
   234  	// The quotient is either t1, t1+1, or t1+2.
   235  	// We'll try t1 and adjust if needed.
   236  	qq := t1
   237  	// compute remainder r=x-d*q.
   238  	dq1, dq0 := bits.Mul(d, qq)
   239  	r0, b := bits.Sub(uint(x0), dq0, 0)
   240  	r1, _ := bits.Sub(uint(x1), dq1, b)
   241  	// The remainder we just computed is bounded above by B+d:
   242  	// r = x1*B + x0 - d*q.
   243  	//   = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦
   244  	//   = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha)                                   0 <= alpha < 1
   245  	//   = x1*B + x0 - x1*d/B*m                         - x1*d - x0*d/B + d*alpha
   246  	//   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
   247  	//   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
   248  	//   = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta)        - x1*d - x0*d/B + d*alpha   0 <= beta < 1
   249  	//   = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha
   250  	//   =        x0        + x1/B        + x1*d/B*beta        - x0*d/B + d*alpha
   251  	//   = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha
   252  	//   <  B*(1-d/B) +  d*B/B          + d          because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1
   253  	//   =  B - d     +  d              + d
   254  	//   = B+d
   255  	// So r1 can only be 0 or 1. If r1 is 1, then we know q was too small.
   256  	// Add 1 to q and subtract d from r. That guarantees that r is <B, so
   257  	// we no longer need to keep track of r1.
   258  	if r1 != 0 {
   259  		qq++
   260  		r0 -= d
   261  	}
   262  	// If the remainder is still too large, increment q one more time.
   263  	if r0 >= d {
   264  		qq++
   265  		r0 -= d
   266  	}
   267  	return Word(qq), Word(r0 >> s)
   268  }
   269  
   270  // reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1).
   271  func reciprocalWord(d1 Word) Word {
   272  	u := uint(d1 << nlz(d1))
   273  	x1 := ^u
   274  	x0 := uint(_M)
   275  	rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U
   276  	return Word(rec)
   277  }
   278
View as plain text