Source file src/runtime/internal/sys/intrinsics.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package sys
     6  
     7  // Copied from math/bits to avoid dependence.
     8  
     9  var deBruijn32tab = [32]byte{
    10  	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
    11  	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
    12  }
    13  
    14  const deBruijn32 = 0x077CB531
    15  
    16  var deBruijn64tab = [64]byte{
    17  	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
    18  	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
    19  	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
    20  	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
    21  }
    22  
    23  const deBruijn64 = 0x03f79d71b4ca8b09
    24  
    25  const ntz8tab = "" +
    26  	"\x08\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    27  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    28  	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    29  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    30  	"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    31  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    32  	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    33  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    34  	"\x07\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    35  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    36  	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    37  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    38  	"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    39  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    40  	"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
    41  	"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00"
    42  
    43  // TrailingZeros32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
    44  func TrailingZeros32(x uint32) int {
    45  	if x == 0 {
    46  		return 32
    47  	}
    48  	// see comment in TrailingZeros64
    49  	return int(deBruijn32tab[(x&-x)*deBruijn32>>(32-5)])
    50  }
    51  
    52  // TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
    53  func TrailingZeros64(x uint64) int {
    54  	if x == 0 {
    55  		return 64
    56  	}
    57  	// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
    58  	//
    59  	// x & -x leaves only the right-most bit set in the word. Let k be the
    60  	// index of that bit. Since only a single bit is set, the value is two
    61  	// to the power of k. Multiplying by a power of two is equivalent to
    62  	// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
    63  	// is such that all six bit, consecutive substrings are distinct.
    64  	// Therefore, if we have a left shifted version of this constant we can
    65  	// find by how many bits it was shifted by looking at which six bit
    66  	// substring ended up at the top of the word.
    67  	// (Knuth, volume 4, section 7.3.1)
    68  	return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
    69  }
    70  
    71  // TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
    72  func TrailingZeros8(x uint8) int {
    73  	return int(ntz8tab[x])
    74  }
    75  
    76  const len8tab = "" +
    77  	"\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" +
    78  	"\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" +
    79  	"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
    80  	"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
    81  	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
    82  	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
    83  	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
    84  	"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
    85  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
    86  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
    87  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
    88  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
    89  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
    90  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
    91  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
    92  	"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
    93  
    94  // Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
    95  //
    96  // nosplit because this is used in src/runtime/histogram.go, which make run in sensitive contexts.
    97  //
    98  //go:nosplit
    99  func Len64(x uint64) (n int) {
   100  	if x >= 1<<32 {
   101  		x >>= 32
   102  		n = 32
   103  	}
   104  	if x >= 1<<16 {
   105  		x >>= 16
   106  		n += 16
   107  	}
   108  	if x >= 1<<8 {
   109  		x >>= 8
   110  		n += 8
   111  	}
   112  	return n + int(len8tab[x])
   113  }
   114  
   115  // --- OnesCount ---
   116  
   117  const m0 = 0x5555555555555555 // 01010101 ...
   118  const m1 = 0x3333333333333333 // 00110011 ...
   119  const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
   120  
   121  // OnesCount64 returns the number of one bits ("population count") in x.
   122  func OnesCount64(x uint64) int {
   123  	// Implementation: Parallel summing of adjacent bits.
   124  	// See "Hacker's Delight", Chap. 5: Counting Bits.
   125  	// The following pattern shows the general approach:
   126  	//
   127  	//   x = x>>1&(m0&m) + x&(m0&m)
   128  	//   x = x>>2&(m1&m) + x&(m1&m)
   129  	//   x = x>>4&(m2&m) + x&(m2&m)
   130  	//   x = x>>8&(m3&m) + x&(m3&m)
   131  	//   x = x>>16&(m4&m) + x&(m4&m)
   132  	//   x = x>>32&(m5&m) + x&(m5&m)
   133  	//   return int(x)
   134  	//
   135  	// Masking (& operations) can be left away when there's no
   136  	// danger that a field's sum will carry over into the next
   137  	// field: Since the result cannot be > 64, 8 bits is enough
   138  	// and we can ignore the masks for the shifts by 8 and up.
   139  	// Per "Hacker's Delight", the first line can be simplified
   140  	// more, but it saves at best one instruction, so we leave
   141  	// it alone for clarity.
   142  	const m = 1<<64 - 1
   143  	x = x>>1&(m0&m) + x&(m0&m)
   144  	x = x>>2&(m1&m) + x&(m1&m)
   145  	x = (x>>4 + x) & (m2 & m)
   146  	x += x >> 8
   147  	x += x >> 16
   148  	x += x >> 32
   149  	return int(x) & (1<<7 - 1)
   150  }
   151  
   152  // LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
   153  func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
   154  
   155  // LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
   156  func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
   157  
   158  // Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
   159  func Len8(x uint8) int {
   160  	return int(len8tab[x])
   161  }
   162  
   163  // Bswap64 returns its input with byte order reversed
   164  // 0x0102030405060708 -> 0x0807060504030201
   165  func Bswap64(x uint64) uint64 {
   166  	c8 := uint64(0x00ff00ff00ff00ff)
   167  	a := x >> 8 & c8
   168  	b := (x & c8) << 8
   169  	x = a | b
   170  	c16 := uint64(0x0000ffff0000ffff)
   171  	a = x >> 16 & c16
   172  	b = (x & c16) << 16
   173  	x = a | b
   174  	c32 := uint64(0x00000000ffffffff)
   175  	a = x >> 32 & c32
   176  	b = (x & c32) << 32
   177  	x = a | b
   178  	return x
   179  }
   180  
   181  // Bswap32 returns its input with byte order reversed
   182  // 0x01020304 -> 0x04030201
   183  func Bswap32(x uint32) uint32 {
   184  	c8 := uint32(0x00ff00ff)
   185  	a := x >> 8 & c8
   186  	b := (x & c8) << 8
   187  	x = a | b
   188  	c16 := uint32(0x0000ffff)
   189  	a = x >> 16 & c16
   190  	b = (x & c16) << 16
   191  	x = a | b
   192  	return x
   193  }
   194  
   195  // Prefetch prefetches data from memory addr to cache
   196  //
   197  // AMD64: Produce PREFETCHT0 instruction
   198  //
   199  // ARM64: Produce PRFM instruction with PLDL1KEEP option
   200  func Prefetch(addr uintptr) {}
   201  
   202  // PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed.
   203  // That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache.
   204  //
   205  // AMD64: Produce PREFETCHNTA instruction
   206  //
   207  // ARM64: Produce PRFM instruction with PLDL1STRM option
   208  func PrefetchStreamed(addr uintptr) {}
   209  

View as plain text