# Source file src/cmd/compile/internal/ssa/magic.go

## Documentation: cmd/compile/internal/ssa

```     1  // Copyright 2016 The Go Authors. All rights reserved.
2  // Use of this source code is governed by a BSD-style
3  // license that can be found in the LICENSE file.
4
5  package ssa
6
7  import (
8  	"math/big"
9  	"math/bits"
10  )
11
12  // So you want to compute x / c for some constant c?
13  // Machine division instructions are slow, so we try to
14  // compute this division with a multiplication + a few
15  // other cheap instructions instead.
16  // (We assume here that c != 0, +/- 1, or +/- 2^i.  Those
17  // cases are easy to handle in different ways).
18
19  // Technique from https://gmplib.org/~tege/divcnst-pldi94.pdf
20
21  // First consider unsigned division.
22  // Our strategy is to precompute 1/c then do
23  //   ⎣x / c⎦ = ⎣x * (1/c)⎦.
24  // 1/c is less than 1, so we can't compute it directly in
25  // integer arithmetic.  Let's instead compute 2^e/c
26  // for a value of e TBD (^ = exponentiation).  Then
27  //   ⎣x / c⎦ = ⎣x * (2^e/c) / 2^e⎦.
28  // Dividing by 2^e is easy.  2^e/c isn't an integer, unfortunately.
29  // So we must approximate it.  Let's call its approximation m.
30  // We'll then compute
31  //   ⎣x * m / 2^e⎦
32  // Which we want to be equal to ⎣x / c⎦ for 0 <= x < 2^n-1
33  // where n is the word size.
34  // Setting x = c gives us c * m >= 2^e.
35  // We'll chose m = ⎡2^e/c⎤ to satisfy that equation.
36  // What remains is to choose e.
37  // Let m = 2^e/c + delta, 0 <= delta < 1
38  //   ⎣x * (2^e/c + delta) / 2^e⎦
39  //   ⎣x / c + x * delta / 2^e⎦
40  // We must have x * delta / 2^e < 1/c so that this
41  // additional term never rounds differently than ⎣x / c⎦ does.
42  // Rearranging,
43  //   2^e > x * delta * c
44  // x can be at most 2^n-1 and delta can be at most 1.
45  // So it is sufficient to have 2^e >= 2^n*c.
46  // So we'll choose e = n + s, with s = ⎡log2(c)⎤.
47  //
48  // An additional complication arises because m has n+1 bits in it.
49  // Hardware restricts us to n bit by n bit multiplies.
50  // We divide into 3 cases:
51  //
52  // Case 1: m is even.
53  //   ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦
54  //   ⎣x / c⎦ = ⎣x * (m/2) / 2^(n+s-1)⎦
55  //   ⎣x / c⎦ = ⎣x * (m/2) / 2^n / 2^(s-1)⎦
56  //   ⎣x / c⎦ = ⎣⎣x * (m/2) / 2^n⎦ / 2^(s-1)⎦
57  //   multiply + shift
58  //
59  // Case 2: c is even.
60  //   ⎣x / c⎦ = ⎣(x/2) / (c/2)⎦
61  //   ⎣x / c⎦ = ⎣⎣x/2⎦ / (c/2)⎦
62  //     This is just the original problem, with x' = ⎣x/2⎦, c' = c/2, n' = n-1.
63  //       s' = s-1
64  //       m' = ⎡2^(n'+s')/c'⎤
65  //          = ⎡2^(n+s-1)/c⎤
66  //          = ⎡m/2⎤
67  //   ⎣x / c⎦ = ⎣x' * m' / 2^(n'+s')⎦
68  //   ⎣x / c⎦ = ⎣⎣x/2⎦ * ⎡m/2⎤ / 2^(n+s-2)⎦
69  //   ⎣x / c⎦ = ⎣⎣⎣x/2⎦ * ⎡m/2⎤ / 2^n⎦ / 2^(s-2)⎦
70  //   shift + multiply + shift
71  //
72  // Case 3: everything else
73  //   let k = m - 2^n. k fits in n bits.
74  //   ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦
75  //   ⎣x / c⎦ = ⎣x * (2^n + k) / 2^(n+s)⎦
76  //   ⎣x / c⎦ = ⎣(x + x * k / 2^n) / 2^s⎦
77  //   ⎣x / c⎦ = ⎣(x + ⎣x * k / 2^n⎦) / 2^s⎦
78  //   ⎣x / c⎦ = ⎣(x + ⎣x * k / 2^n⎦) / 2^s⎦
79  //   ⎣x / c⎦ = ⎣⎣(x + ⎣x * k / 2^n⎦) / 2⎦ / 2^(s-1)⎦
80  //   multiply + avg + shift
81  //
82  // These can be implemented in hardware using:
83  //  ⎣a * b / 2^n⎦ - aka high n bits of an n-bit by n-bit multiply.
84  //  ⎣(a+b) / 2⎦   - aka "average" of two n-bit numbers.
85  //                  (Not just a regular add & shift because the intermediate result
86  //                   a+b has n+1 bits in it.  Nevertheless, can be done
87  //                   in 2 instructions on x86.)
88
89  // umagicOK reports whether we should strength reduce a n-bit divide by c.
90  func umagicOK(n uint, c int64) bool {
91  	// Convert from ConstX auxint values to the real uint64 constant they represent.
92  	d := uint64(c) << (64 - n) >> (64 - n)
93
94  	// Doesn't work for 0.
95  	// Don't use for powers of 2.
96  	return d&(d-1) != 0
97  }
98
99  type umagicData struct {
100  	s int64  // ⎡log2(c)⎤
101  	m uint64 // ⎡2^(n+s)/c⎤ - 2^n
102  }
103
104  // umagic computes the constants needed to strength reduce unsigned n-bit divides by the constant uint64(c).
105  // The return values satisfy for all 0 <= x < 2^n
106  //  floor(x / uint64(c)) = x * (m + 2^n) >> (n+s)
107  func umagic(n uint, c int64) umagicData {
108  	// Convert from ConstX auxint values to the real uint64 constant they represent.
109  	d := uint64(c) << (64 - n) >> (64 - n)
110
111  	C := new(big.Int).SetUint64(d)
112  	s := C.BitLen()
113  	M := big.NewInt(1)
114  	M.Lsh(M, n+uint(s))     // 2^(n+s)
115  	M.Add(M, C)             // 2^(n+s)+c
116  	M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1
117  	M.Div(M, C)             // ⎡2^(n+s)/c⎤
118  	if M.Bit(int(n)) != 1 {
119  		panic("n+1st bit isn't set")
120  	}
121  	M.SetBit(M, int(n), 0)
122  	m := M.Uint64()
123  	return umagicData{s: int64(s), m: m}
124  }
125
126  // For signed division, we use a similar strategy.
127  // First, we enforce a positive c.
128  //   x / c = -(x / (-c))
129  // This will require an additional Neg op for c<0.
130  //
131  // If x is positive we're in a very similar state
132  // to the unsigned case above.  We define:
133  //   s = ⎡log2(c)⎤-1
134  //   m = ⎡2^(n+s)/c⎤
135  // Then
136  //   ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦
137  // If x is negative we have
138  //   ⎡x / c⎤ = ⎣x * m / 2^(n+s)⎦ + 1
139  // (TODO: derivation?)
140  //
141  // The multiply is a bit odd, as it is a signed n-bit value
142  // times an unsigned n-bit value.  For n smaller than the
143  // word size, we can extend x and m appropriately and use the
144  // signed multiply instruction.  For n == word size,
145  // we must use the signed multiply high and correct
146  // the result by adding x*2^n.
147  //
148  // Adding 1 if x<0 is done by subtracting x>>(n-1).
149
150  func smagicOK(n uint, c int64) bool {
151  	if c < 0 {
152  		// Doesn't work for negative c.
153  		return false
154  	}
155  	// Doesn't work for 0.
156  	// Don't use it for powers of 2.
157  	return c&(c-1) != 0
158  }
159
160  type smagicData struct {
161  	s int64  // ⎡log2(c)⎤-1
162  	m uint64 // ⎡2^(n+s)/c⎤
163  }
164
165  // magic computes the constants needed to strength reduce signed n-bit divides by the constant c.
166  // Must have c>0.
167  // The return values satisfy for all -2^(n-1) <= x < 2^(n-1)
168  //  trunc(x / c) = x * m >> (n+s) + (x < 0 ? 1 : 0)
169  func smagic(n uint, c int64) smagicData {
170  	C := new(big.Int).SetInt64(c)
171  	s := C.BitLen() - 1
172  	M := big.NewInt(1)
173  	M.Lsh(M, n+uint(s))     // 2^(n+s)
174  	M.Add(M, C)             // 2^(n+s)+c
175  	M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1
176  	M.Div(M, C)             // ⎡2^(n+s)/c⎤
177  	if M.Bit(int(n)) != 0 {
178  		panic("n+1st bit is set")
179  	}
180  	if M.Bit(int(n-1)) == 0 {
181  		panic("nth bit is not set")
182  	}
183  	m := M.Uint64()
184  	return smagicData{s: int64(s), m: m}
185  }
186
187  // Divisibility x%c == 0 can be checked more efficiently than directly computing
188  // the modulus x%c and comparing against 0.
189  //
190  // The same "Division by invariant integers using multiplication" paper
191  // by Granlund and Montgomery referenced above briefly mentions this method
192  // and it is further elaborated in "Hacker's Delight" by Warren Section 10-17
193  //
194  // The first thing to note is that for odd integers, exact division can be computed
195  // by using the modular inverse with respect to the word size 2^n.
196  //
197  // Given c, compute m such that (c * m) mod 2^n == 1
198  // Then if c divides x (x%c ==0), the quotient is given by q = x/c == x*m mod 2^n
199  //
200  // x can range from 0, c, 2c, 3c, ... ⎣(2^n - 1)/c⎦ * c the maximum multiple
201  // Thus, x*m mod 2^n is 0, 1, 2, 3, ... ⎣(2^n - 1)/c⎦
202  // i.e. the quotient takes all values from zero up to max = ⎣(2^n - 1)/c⎦
203  //
204  // If x is not divisible by c, then x*m mod 2^n must take some larger value than max.
205  //
206  // This gives x*m mod 2^n <= ⎣(2^n - 1)/c⎦ as a test for divisibility
207  // involving one multiplication and compare.
208  //
209  // To extend this to even integers, consider c = d0 * 2^k where d0 is odd.
210  // We can test whether x is divisible by both d0 and 2^k.
211  // For d0, the test is the same as above.  Let m be such that m*d0 mod 2^n == 1
212  // Then x*m mod 2^n <= ⎣(2^n - 1)/d0⎦ is the first test.
213  // The test for divisibility by 2^k is a check for k trailing zeroes.
214  // Note that since d0 is odd, m is odd and thus x*m will have the same number of
215  // trailing zeroes as x.  So the two tests are,
216  //
217  // x*m mod 2^n <= ⎣(2^n - 1)/d0⎦
218  // and x*m ends in k zero bits
219  //
220  // These can be combined into a single comparison by the following
221  // (theorem ZRU in Hacker's Delight) for unsigned integers.
222  //
223  // x <= a and x ends in k zero bits if and only if RotRight(x ,k) <= ⎣a/(2^k)⎦
224  // Where RotRight(x ,k) is right rotation of x by k bits.
225  //
226  // To prove the first direction, x <= a -> ⎣x/(2^k)⎦ <= ⎣a/(2^k)⎦
227  // But since x ends in k zeroes all the rotated bits would be zero too.
228  // So RotRight(x, k) == ⎣x/(2^k)⎦ <= ⎣a/(2^k)⎦
229  //
230  // If x does not end in k zero bits, then RotRight(x, k)
231  // has some non-zero bits in the k highest bits.
232  // ⎣x/(2^k)⎦ has all zeroes in the k highest bits,
233  // so RotRight(x, k) > ⎣x/(2^k)⎦
234  //
235  // Finally, if x > a and has k trailing zero bits, then RotRight(x, k) == ⎣x/(2^k)⎦
236  // and ⎣x/(2^k)⎦ must be greater than ⎣a/(2^k)⎦, that is the top n-k bits of x must
237  // be greater than the top n-k bits of a because the rest of x bits are zero.
238  //
239  // So the two conditions about can be replaced with the single test
240  //
241  // RotRight(x*m mod 2^n, k) <= ⎣(2^n - 1)/c⎦
242  //
243  // Where d0*2^k was replaced by c on the right hand side.
244
245  // uivisibleOK reports whether we should strength reduce a n-bit dividisibilty check by c.
246  func udivisibleOK(n uint, c int64) bool {
247  	// Convert from ConstX auxint values to the real uint64 constant they represent.
248  	d := uint64(c) << (64 - n) >> (64 - n)
249
250  	// Doesn't work for 0.
251  	// Don't use for powers of 2.
252  	return d&(d-1) != 0
253  }
254
255  type udivisibleData struct {
256  	k   int64  // trailingZeros(c)
257  	m   uint64 // m * (c>>k) mod 2^n == 1 multiplicative inverse of odd portion modulo 2^n
258  	max uint64 // ⎣(2^n - 1)/ c⎦ max value to for divisibility
259  }
260
261  func udivisible(n uint, c int64) udivisibleData {
262  	// Convert from ConstX auxint values to the real uint64 constant they represent.
263  	d := uint64(c) << (64 - n) >> (64 - n)
264
265  	k := bits.TrailingZeros64(d)
266  	d0 := d >> uint(k) // the odd portion of the divisor
267
268  	mask := ^uint64(0) >> (64 - n)
269
270  	// Calculate the multiplicative inverse via Newton's method.
271  	// Quadratic convergence doubles the number of correct bits per iteration.
272  	m := d0            // initial guess correct to 3-bits d0*d0 mod 8 == 1
273  	m = m * (2 - m*d0) // 6-bits
274  	m = m * (2 - m*d0) // 12-bits
275  	m = m * (2 - m*d0) // 24-bits
276  	m = m * (2 - m*d0) // 48-bits
277  	m = m * (2 - m*d0) // 96-bits >= 64-bits
278  	m = m & mask
279
280  	max := mask / d
281
282  	return udivisibleData{
283  		k:   int64(k),
284  		m:   m,
285  		max: max,
286  	}
287  }
288
289  // For signed integers, a similar method follows.
290  //
291  // Given c > 1 and odd, compute m such that (c * m) mod 2^n == 1
292  // Then if c divides x (x%c ==0), the quotient is given by q = x/c == x*m mod 2^n
293  //
294  // x can range from ⎡-2^(n-1)/c⎤ * c, ... -c, 0, c, ...  ⎣(2^(n-1) - 1)/c⎦ * c
295  // Thus, x*m mod 2^n is ⎡-2^(n-1)/c⎤, ... -2, -1, 0, 1, 2, ... ⎣(2^(n-1) - 1)/c⎦
296  //
297  // So, x is a multiple of c if and only if:
298  // ⎡-2^(n-1)/c⎤ <= x*m mod 2^n <= ⎣(2^(n-1) - 1)/c⎦
299  //
300  // Since c > 1 and odd, this can be simplified by
301  // ⎡-2^(n-1)/c⎤ == ⎡(-2^(n-1) + 1)/c⎤ == -⎣(2^(n-1) - 1)/c⎦
302  //
303  // -⎣(2^(n-1) - 1)/c⎦ <= x*m mod 2^n <= ⎣(2^(n-1) - 1)/c⎦
304  //
305  // To extend this to even integers, consider c = d0 * 2^k where d0 is odd.
306  // We can test whether x is divisible by both d0 and 2^k.
307  //
308  // Let m be such that (d0 * m) mod 2^n == 1.
309  // Let q = x*m mod 2^n. Then c divides x if:
310  //
311  // -⎣(2^(n-1) - 1)/d0⎦ <= q <= ⎣(2^(n-1) - 1)/d0⎦ and q ends in at least k 0-bits
312  //
313  // To transform this to a single comparison, we use the following theorem (ZRS in Hacker's Delight).
314  //
315  // For a >= 0 the following conditions are equivalent:
316  // 1) -a <= x <= a and x ends in at least k 0-bits
317  // 2) RotRight(x+a', k) <= ⎣2a'/2^k⎦
318  //
319  // Where a' = a & -2^k (a with its right k bits set to zero)
320  //
321  // To see that 1 & 2 are equivalent, note that -a <= x <= a is equivalent to
322  // -a' <= x <= a' if and only if x ends in at least k 0-bits.  Adding -a' to each side gives,
323  // 0 <= x + a' <= 2a' and x + a' ends in at least k 0-bits if and only if x does since a' has
324  // k 0-bits by definition.  We can use theorem ZRU above with x -> x + a' and a -> 2a' giving 1) == 2).
325  //
326  // Let m be such that (d0 * m) mod 2^n == 1.
327  // Let q = x*m mod 2^n.
328  // Let a' = ⎣(2^(n-1) - 1)/d0⎦ & -2^k
329  //
330  // Then the divisibility test is:
331  //
332  // RotRight(q+a', k) <= ⎣2a'/2^k⎦
333  //
334  // Note that the calculation is performed using unsigned integers.
335  // Since a' can have n-1 bits, 2a' may have n bits and there is no risk of overflow.
336
337  // sdivisibleOK reports whether we should strength reduce a n-bit dividisibilty check by c.
338  func sdivisibleOK(n uint, c int64) bool {
339  	if c < 0 {
340  		// Doesn't work for negative c.
341  		return false
342  	}
343  	// Doesn't work for 0.
344  	// Don't use it for powers of 2.
345  	return c&(c-1) != 0
346  }
347
348  type sdivisibleData struct {
349  	k   int64  // trailingZeros(c)
350  	m   uint64 // m * (c>>k) mod 2^n == 1 multiplicative inverse of odd portion modulo 2^n
351  	a   uint64 // ⎣(2^(n-1) - 1)/ (c>>k)⎦ & -(1<<k) additive constant
352  	max uint64 // ⎣(2 a) / (1<<k)⎦ max value to for divisibility
353  }
354
355  func sdivisible(n uint, c int64) sdivisibleData {
356  	d := uint64(c)
357  	k := bits.TrailingZeros64(d)
358  	d0 := d >> uint(k) // the odd portion of the divisor
359
360  	mask := ^uint64(0) >> (64 - n)
361
362  	// Calculate the multiplicative inverse via Newton's method.
363  	// Quadratic convergence doubles the number of correct bits per iteration.
364  	m := d0            // initial guess correct to 3-bits d0*d0 mod 8 == 1
365  	m = m * (2 - m*d0) // 6-bits
366  	m = m * (2 - m*d0) // 12-bits
367  	m = m * (2 - m*d0) // 24-bits
368  	m = m * (2 - m*d0) // 48-bits
369  	m = m * (2 - m*d0) // 96-bits >= 64-bits
370  	m = m & mask
371
372  	a := ((mask >> 1) / d0) & -(1 << uint(k))
373  	max := (2 * a) >> uint(k)
374
375  	return sdivisibleData{
376  		k:   int64(k),
377  		m:   m,
378  		a:   a,
379  		max: max,
380  	}
381  }
382
```

View as plain text