...
Run Format

Source file src/runtime/mbitmap.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Garbage collector: type and heap bitmaps.
     6	//
     7	// Stack, data, and bss bitmaps
     8	//
     9	// Stack frames and global variables in the data and bss sections are described
    10	// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
    11	// to be visited during GC. The bits in each byte are consumed starting with
    12	// the low bit: 1<<0, 1<<1, and so on.
    13	//
    14	// Heap bitmap
    15	//
    16	// The allocated heap comes from a subset of the memory in the range [start, used),
    17	// where start == mheap_.arena_start and used == mheap_.arena_used.
    18	// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
    19	// stored in bytes indexed backward in memory from start.
    20	// That is, the byte at address start-1 holds the 2-bit entries for the four words
    21	// start through start+3*ptrSize, the byte at start-2 holds the entries for
    22	// start+4*ptrSize through start+7*ptrSize, and so on.
    23	//
    24	// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
    25	// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
    26	// The meaning of the high bit depends on the position of the word being described
    27	// in its allocated object. In all words *except* the second word, the
    28	// high bit indicates that the object is still being described. In
    29	// these words, if a bit pair with a high bit 0 is encountered, the
    30	// low bit can also be assumed to be 0, and the object description is
    31	// over. This 00 is called the ``dead'' encoding: it signals that the
    32	// rest of the words in the object are uninteresting to the garbage
    33	// collector.
    34	//
    35	// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
    36	//
    37	// The 2-bit entries are split when written into the byte, so that the top half
    38	// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
    39	// bits.
    40	// This form allows a copy from the 1-bit to the 4-bit form to keep the
    41	// pointer bits contiguous, instead of having to space them out.
    42	//
    43	// The code makes use of the fact that the zero value for a heap bitmap
    44	// has no live pointer bit set and is (depending on position), not used,
    45	// not checkmarked, and is the dead encoding.
    46	// These properties must be preserved when modifying the encoding.
    47	//
    48	// Checkmarks
    49	//
    50	// In a concurrent garbage collector, one worries about failing to mark
    51	// a live object due to mutations without write barriers or bugs in the
    52	// collector implementation. As a sanity check, the GC has a 'checkmark'
    53	// mode that retraverses the object graph with the world stopped, to make
    54	// sure that everything that should be marked is marked.
    55	// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
    56	// for the second word of the object holds the checkmark bit.
    57	// When not in checkmark mode, this bit is set to 1.
    58	//
    59	// The smallest possible allocation is 8 bytes. On a 32-bit machine, that
    60	// means every allocated object has two words, so there is room for the
    61	// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
    62	// just one word, so the second bit pair is not available for encoding the
    63	// checkmark. However, because non-pointer allocations are combined
    64	// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
    65	// must be a pointer, so the type bit in the first word is not actually needed.
    66	// It is still used in general, except in checkmark the type bit is repurposed
    67	// as the checkmark bit and then reinitialized (to 1) as the type bit when
    68	// finished.
    69	//
    70	
    71	package runtime
    72	
    73	import (
    74		"runtime/internal/atomic"
    75		"runtime/internal/sys"
    76		"unsafe"
    77	)
    78	
    79	const (
    80		bitPointer = 1 << 0
    81		bitScan    = 1 << 4
    82	
    83		heapBitsShift   = 1                     // shift offset between successive bitPointer or bitScan entries
    84		heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
    85	
    86		// all scan/pointer bits in a byte
    87		bitScanAll    = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)
    88		bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
    89	)
    90	
    91	// addb returns the byte pointer p+n.
    92	//go:nowritebarrier
    93	//go:nosplit
    94	func addb(p *byte, n uintptr) *byte {
    95		// Note: wrote out full expression instead of calling add(p, n)
    96		// to reduce the number of temporaries generated by the
    97		// compiler for this trivial expression during inlining.
    98		return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
    99	}
   100	
   101	// subtractb returns the byte pointer p-n.
   102	// subtractb is typically used when traversing the pointer tables referred to by hbits
   103	// which are arranged in reverse order.
   104	//go:nowritebarrier
   105	//go:nosplit
   106	func subtractb(p *byte, n uintptr) *byte {
   107		// Note: wrote out full expression instead of calling add(p, -n)
   108		// to reduce the number of temporaries generated by the
   109		// compiler for this trivial expression during inlining.
   110		return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
   111	}
   112	
   113	// add1 returns the byte pointer p+1.
   114	//go:nowritebarrier
   115	//go:nosplit
   116	func add1(p *byte) *byte {
   117		// Note: wrote out full expression instead of calling addb(p, 1)
   118		// to reduce the number of temporaries generated by the
   119		// compiler for this trivial expression during inlining.
   120		return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
   121	}
   122	
   123	// subtract1 returns the byte pointer p-1.
   124	// subtract1 is typically used when traversing the pointer tables referred to by hbits
   125	// which are arranged in reverse order.
   126	//go:nowritebarrier
   127	//
   128	// nosplit because it is used during write barriers and must not be preempted.
   129	//go:nosplit
   130	func subtract1(p *byte) *byte {
   131		// Note: wrote out full expression instead of calling subtractb(p, 1)
   132		// to reduce the number of temporaries generated by the
   133		// compiler for this trivial expression during inlining.
   134		return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
   135	}
   136	
   137	// mHeap_MapBits is called each time arena_used is extended.
   138	// It maps any additional bitmap memory needed for the new arena memory.
   139	// It must be called with the expected new value of arena_used,
   140	// *before* h.arena_used has been updated.
   141	// Waiting to update arena_used until after the memory has been mapped
   142	// avoids faults when other threads try access the bitmap immediately
   143	// after observing the change to arena_used.
   144	//
   145	//go:nowritebarrier
   146	func (h *mheap) mapBits(arena_used uintptr) {
   147		// Caller has added extra mappings to the arena.
   148		// Add extra mappings of bitmap words as needed.
   149		// We allocate extra bitmap pieces in chunks of bitmapChunk.
   150		const bitmapChunk = 8192
   151	
   152		n := (arena_used - mheap_.arena_start) / heapBitmapScale
   153		n = round(n, bitmapChunk)
   154		n = round(n, physPageSize)
   155		if h.bitmap_mapped >= n {
   156			return
   157		}
   158	
   159		sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
   160		h.bitmap_mapped = n
   161	}
   162	
   163	// heapBits provides access to the bitmap bits for a single heap word.
   164	// The methods on heapBits take value receivers so that the compiler
   165	// can more easily inline calls to those methods and registerize the
   166	// struct fields independently.
   167	type heapBits struct {
   168		bitp  *uint8
   169		shift uint32
   170	}
   171	
   172	// markBits provides access to the mark bit for an object in the heap.
   173	// bytep points to the byte holding the mark bit.
   174	// mask is a byte with a single bit set that can be &ed with *bytep
   175	// to see if the bit has been set.
   176	// *m.byte&m.mask != 0 indicates the mark bit is set.
   177	// index can be used along with span information to generate
   178	// the address of the object in the heap.
   179	// We maintain one set of mark bits for allocation and one for
   180	// marking purposes.
   181	type markBits struct {
   182		bytep *uint8
   183		mask  uint8
   184		index uintptr
   185	}
   186	
   187	//go:nosplit
   188	func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
   189		whichByte := allocBitIndex / 8
   190		whichBit := allocBitIndex % 8
   191		bytePtr := addb(s.allocBits, whichByte)
   192		return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex}
   193	}
   194	
   195	// refillaCache takes 8 bytes s.allocBits starting at whichByte
   196	// and negates them so that ctz (count trailing zeros) instructions
   197	// can be used. It then places these 8 bytes into the cached 64 bit
   198	// s.allocCache.
   199	func (s *mspan) refillAllocCache(whichByte uintptr) {
   200		bytes := (*[8]uint8)(unsafe.Pointer(addb(s.allocBits, whichByte)))
   201		aCache := uint64(0)
   202		aCache |= uint64(bytes[0])
   203		aCache |= uint64(bytes[1]) << (1 * 8)
   204		aCache |= uint64(bytes[2]) << (2 * 8)
   205		aCache |= uint64(bytes[3]) << (3 * 8)
   206		aCache |= uint64(bytes[4]) << (4 * 8)
   207		aCache |= uint64(bytes[5]) << (5 * 8)
   208		aCache |= uint64(bytes[6]) << (6 * 8)
   209		aCache |= uint64(bytes[7]) << (7 * 8)
   210		s.allocCache = ^aCache
   211	}
   212	
   213	// nextFreeIndex returns the index of the next free object in s at
   214	// or after s.freeindex.
   215	// There are hardware instructions that can be used to make this
   216	// faster if profiling warrants it.
   217	func (s *mspan) nextFreeIndex() uintptr {
   218		sfreeindex := s.freeindex
   219		snelems := s.nelems
   220		if sfreeindex == snelems {
   221			return sfreeindex
   222		}
   223		if sfreeindex > snelems {
   224			throw("s.freeindex > s.nelems")
   225		}
   226	
   227		aCache := s.allocCache
   228	
   229		bitIndex := sys.Ctz64(aCache)
   230		for bitIndex == 64 {
   231			// Move index to start of next cached bits.
   232			sfreeindex = (sfreeindex + 64) &^ (64 - 1)
   233			if sfreeindex >= snelems {
   234				s.freeindex = snelems
   235				return snelems
   236			}
   237			whichByte := sfreeindex / 8
   238			// Refill s.allocCache with the next 64 alloc bits.
   239			s.refillAllocCache(whichByte)
   240			aCache = s.allocCache
   241			bitIndex = sys.Ctz64(aCache)
   242			// nothing available in cached bits
   243			// grab the next 8 bytes and try again.
   244		}
   245		result := sfreeindex + uintptr(bitIndex)
   246		if result >= snelems {
   247			s.freeindex = snelems
   248			return snelems
   249		}
   250	
   251		s.allocCache >>= (bitIndex + 1)
   252		sfreeindex = result + 1
   253	
   254		if sfreeindex%64 == 0 && sfreeindex != snelems {
   255			// We just incremented s.freeindex so it isn't 0.
   256			// As each 1 in s.allocCache was encountered and used for allocation
   257			// it was shifted away. At this point s.allocCache contains all 0s.
   258			// Refill s.allocCache so that it corresponds
   259			// to the bits at s.allocBits starting at s.freeindex.
   260			whichByte := sfreeindex / 8
   261			s.refillAllocCache(whichByte)
   262		}
   263		s.freeindex = sfreeindex
   264		return result
   265	}
   266	
   267	// isFree returns whether the index'th object in s is unallocated.
   268	func (s *mspan) isFree(index uintptr) bool {
   269		if index < s.freeindex {
   270			return false
   271		}
   272		whichByte := index / 8
   273		whichBit := index % 8
   274		byteVal := *addb(s.allocBits, whichByte)
   275		return byteVal&uint8(1<<whichBit) == 0
   276	}
   277	
   278	func (s *mspan) objIndex(p uintptr) uintptr {
   279		byteOffset := p - s.base()
   280		if byteOffset == 0 {
   281			return 0
   282		}
   283		if s.baseMask != 0 {
   284			// s.baseMask is 0, elemsize is a power of two, so shift by s.divShift
   285			return byteOffset >> s.divShift
   286		}
   287		return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2)
   288	}
   289	
   290	func markBitsForAddr(p uintptr) markBits {
   291		s := spanOf(p)
   292		objIndex := s.objIndex(p)
   293		return s.markBitsForIndex(objIndex)
   294	}
   295	
   296	func (s *mspan) markBitsForIndex(objIndex uintptr) markBits {
   297		whichByte := objIndex / 8
   298		bitMask := uint8(1 << (objIndex % 8)) // low 3 bits hold the bit index
   299		bytePtr := addb(s.gcmarkBits, whichByte)
   300		return markBits{bytePtr, bitMask, objIndex}
   301	}
   302	
   303	func (s *mspan) markBitsForBase() markBits {
   304		return markBits{s.gcmarkBits, uint8(1), 0}
   305	}
   306	
   307	// isMarked reports whether mark bit m is set.
   308	func (m markBits) isMarked() bool {
   309		return *m.bytep&m.mask != 0
   310	}
   311	
   312	// setMarked sets the marked bit in the markbits, atomically. Some compilers
   313	// are not able to inline atomic.Or8 function so if it appears as a hot spot consider
   314	// inlining it manually.
   315	func (m markBits) setMarked() {
   316		// Might be racing with other updates, so use atomic update always.
   317		// We used to be clever here and use a non-atomic update in certain
   318		// cases, but it's not worth the risk.
   319		atomic.Or8(m.bytep, m.mask)
   320	}
   321	
   322	// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
   323	func (m markBits) setMarkedNonAtomic() {
   324		*m.bytep |= m.mask
   325	}
   326	
   327	// clearMarked clears the marked bit in the markbits, atomically.
   328	func (m markBits) clearMarked() {
   329		// Might be racing with other updates, so use atomic update always.
   330		// We used to be clever here and use a non-atomic update in certain
   331		// cases, but it's not worth the risk.
   332		atomic.And8(m.bytep, ^m.mask)
   333	}
   334	
   335	// clearMarkedNonAtomic clears the marked bit non-atomically.
   336	func (m markBits) clearMarkedNonAtomic() {
   337		*m.bytep ^= m.mask
   338	}
   339	
   340	// markBitsForSpan returns the markBits for the span base address base.
   341	func markBitsForSpan(base uintptr) (mbits markBits) {
   342		if base < mheap_.arena_start || base >= mheap_.arena_used {
   343			throw("markBitsForSpan: base out of range")
   344		}
   345		mbits = markBitsForAddr(base)
   346		if mbits.mask != 1 {
   347			throw("markBitsForSpan: unaligned start")
   348		}
   349		return mbits
   350	}
   351	
   352	// advance advances the markBits to the next object in the span.
   353	func (m *markBits) advance() {
   354		if m.mask == 1<<7 {
   355			m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
   356			m.mask = 1
   357		} else {
   358			m.mask = m.mask << 1
   359		}
   360		m.index++
   361	}
   362	
   363	// heapBitsForAddr returns the heapBits for the address addr.
   364	// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
   365	//
   366	// nosplit because it is used during write barriers and must not be preempted.
   367	//go:nosplit
   368	func heapBitsForAddr(addr uintptr) heapBits {
   369		// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
   370		off := (addr - mheap_.arena_start) / sys.PtrSize
   371		return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)}
   372	}
   373	
   374	// heapBitsForSpan returns the heapBits for the span base address base.
   375	func heapBitsForSpan(base uintptr) (hbits heapBits) {
   376		if base < mheap_.arena_start || base >= mheap_.arena_used {
   377			throw("heapBitsForSpan: base out of range")
   378		}
   379		return heapBitsForAddr(base)
   380	}
   381	
   382	// heapBitsForObject returns the base address for the heap object
   383	// containing the address p, the heapBits for base,
   384	// the object's span, and of the index of the object in s.
   385	// If p does not point into a heap object,
   386	// return base == 0
   387	// otherwise return the base of the object.
   388	//
   389	// refBase and refOff optionally give the base address of the object
   390	// in which the pointer p was found and the byte offset at which it
   391	// was found. These are used for error reporting.
   392	func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) {
   393		arenaStart := mheap_.arena_start
   394		if p < arenaStart || p >= mheap_.arena_used {
   395			return
   396		}
   397		off := p - arenaStart
   398		idx := off >> _PageShift
   399		// p points into the heap, but possibly to the middle of an object.
   400		// Consult the span table to find the block beginning.
   401		s = mheap_.spans[idx]
   402		if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
   403			if s == nil || s.state == _MSpanStack {
   404				// If s is nil, the virtual address has never been part of the heap.
   405				// This pointer may be to some mmap'd region, so we allow it.
   406				// Pointers into stacks are also ok, the runtime manages these explicitly.
   407				return
   408			}
   409	
   410			// The following ensures that we are rigorous about what data
   411			// structures hold valid pointers.
   412			if debug.invalidptr != 0 {
   413				// Typically this indicates an incorrect use
   414				// of unsafe or cgo to store a bad pointer in
   415				// the Go heap. It may also indicate a runtime
   416				// bug.
   417				//
   418				// TODO(austin): We could be more aggressive
   419				// and detect pointers to unallocated objects
   420				// in allocated spans.
   421				printlock()
   422				print("runtime: pointer ", hex(p))
   423				if s.state != mSpanInUse {
   424					print(" to unallocated span")
   425				} else {
   426					print(" to unused region of span")
   427				}
   428				print(" idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
   429				if refBase != 0 {
   430					print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
   431					gcDumpObject("object", refBase, refOff)
   432				}
   433				throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
   434			}
   435			return
   436		}
   437		// If this span holds object of a power of 2 size, just mask off the bits to
   438		// the interior of the object. Otherwise use the size to get the base.
   439		if s.baseMask != 0 {
   440			// optimize for power of 2 sized objects.
   441			base = s.base()
   442			base = base + (p-base)&uintptr(s.baseMask)
   443			objIndex = (base - s.base()) >> s.divShift
   444			// base = p & s.baseMask is faster for small spans,
   445			// but doesn't work for large spans.
   446			// Overall, it's faster to use the more general computation above.
   447		} else {
   448			base = s.base()
   449			if p-base >= s.elemsize {
   450				// n := (p - base) / s.elemsize, using division by multiplication
   451				objIndex = uintptr(p-base) >> s.divShift * uintptr(s.divMul) >> s.divShift2
   452				base += objIndex * s.elemsize
   453			}
   454		}
   455		// Now that we know the actual base, compute heapBits to return to caller.
   456		hbits = heapBitsForAddr(base)
   457		return
   458	}
   459	
   460	// prefetch the bits.
   461	func (h heapBits) prefetch() {
   462		prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
   463	}
   464	
   465	// next returns the heapBits describing the next pointer-sized word in memory.
   466	// That is, if h describes address p, h.next() describes p+ptrSize.
   467	// Note that next does not modify h. The caller must record the result.
   468	//
   469	// nosplit because it is used during write barriers and must not be preempted.
   470	//go:nosplit
   471	func (h heapBits) next() heapBits {
   472		if h.shift < 3*heapBitsShift {
   473			return heapBits{h.bitp, h.shift + heapBitsShift}
   474		}
   475		return heapBits{subtract1(h.bitp), 0}
   476	}
   477	
   478	// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
   479	// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
   480	// h.forward(1) is equivalent to h.next(), just slower.
   481	// Note that forward does not modify h. The caller must record the result.
   482	// bits returns the heap bits for the current word.
   483	func (h heapBits) forward(n uintptr) heapBits {
   484		n += uintptr(h.shift) / heapBitsShift
   485		return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
   486	}
   487	
   488	// The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.
   489	// The result includes in its higher bits the bits for subsequent words
   490	// described by the same bitmap byte.
   491	func (h heapBits) bits() uint32 {
   492		// The (shift & 31) eliminates a test and conditional branch
   493		// from the generated code.
   494		return uint32(*h.bitp) >> (h.shift & 31)
   495	}
   496	
   497	// morePointers returns true if this word and all remaining words in this object
   498	// are scalars.
   499	// h must not describe the second word of the object.
   500	func (h heapBits) morePointers() bool {
   501		return h.bits()&bitScan != 0
   502	}
   503	
   504	// isPointer reports whether the heap bits describe a pointer word.
   505	//
   506	// nosplit because it is used during write barriers and must not be preempted.
   507	//go:nosplit
   508	func (h heapBits) isPointer() bool {
   509		return h.bits()&bitPointer != 0
   510	}
   511	
   512	// hasPointers reports whether the given object has any pointers.
   513	// It must be told how large the object at h is for efficiency.
   514	// h must describe the initial word of the object.
   515	func (h heapBits) hasPointers(size uintptr) bool {
   516		if size == sys.PtrSize { // 1-word objects are always pointers
   517			return true
   518		}
   519		return (*h.bitp>>h.shift)&bitScan != 0
   520	}
   521	
   522	// isCheckmarked reports whether the heap bits have the checkmarked bit set.
   523	// It must be told how large the object at h is, because the encoding of the
   524	// checkmark bit varies by size.
   525	// h must describe the initial word of the object.
   526	func (h heapBits) isCheckmarked(size uintptr) bool {
   527		if size == sys.PtrSize {
   528			return (*h.bitp>>h.shift)&bitPointer != 0
   529		}
   530		// All multiword objects are 2-word aligned,
   531		// so we know that the initial word's 2-bit pair
   532		// and the second word's 2-bit pair are in the
   533		// same heap bitmap byte, *h.bitp.
   534		return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0
   535	}
   536	
   537	// setCheckmarked sets the checkmarked bit.
   538	// It must be told how large the object at h is, because the encoding of the
   539	// checkmark bit varies by size.
   540	// h must describe the initial word of the object.
   541	func (h heapBits) setCheckmarked(size uintptr) {
   542		if size == sys.PtrSize {
   543			atomic.Or8(h.bitp, bitPointer<<h.shift)
   544			return
   545		}
   546		atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
   547	}
   548	
   549	// bulkBarrierPreWrite executes writebarrierptr_prewrite1
   550	// for every pointer slot in the memory range [src, src+size),
   551	// using pointer/scalar information from [dst, dst+size).
   552	// This executes the write barriers necessary before a memmove.
   553	// src, dst, and size must be pointer-aligned.
   554	// The range [dst, dst+size) must lie within a single object.
   555	//
   556	// As a special case, src == 0 indicates that this is being used for a
   557	// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
   558	// barrier.
   559	//
   560	// Callers should call bulkBarrierPreWrite immediately before
   561	// calling memmove(dst, src, size). This function is marked nosplit
   562	// to avoid being preempted; the GC must not stop the goroutine
   563	// between the memmove and the execution of the barriers.
   564	// The caller is also responsible for cgo pointer checks if this
   565	// may be writing Go pointers into non-Go memory.
   566	//
   567	// The pointer bitmap is not maintained for allocations containing
   568	// no pointers at all; any caller of bulkBarrierPreWrite must first
   569	// make sure the underlying allocation contains pointers, usually
   570	// by checking typ.kind&kindNoPointers.
   571	//
   572	//go:nosplit
   573	func bulkBarrierPreWrite(dst, src, size uintptr) {
   574		if (dst|src|size)&(sys.PtrSize-1) != 0 {
   575			throw("bulkBarrierPreWrite: unaligned arguments")
   576		}
   577		if !writeBarrier.needed {
   578			return
   579		}
   580		if !inheap(dst) {
   581			// If dst is on the stack and in a higher frame than the
   582			// caller, we either need to execute write barriers on
   583			// it (which is what happens for normal stack writes
   584			// through pointers to higher frames), or we need to
   585			// force the mark termination stack scan to scan the
   586			// frame containing dst.
   587			//
   588			// Executing write barriers on dst is complicated in the
   589			// general case because we either need to unwind the
   590			// stack to get the stack map, or we need the type's
   591			// bitmap, which may be a GC program.
   592			//
   593			// Hence, we opt for forcing the re-scan to scan the
   594			// frame containing dst, which we can do by simply
   595			// unwinding the stack barriers between the current SP
   596			// and dst's frame.
   597			gp := getg().m.curg
   598			if gp != nil && gp.stack.lo <= dst && dst < gp.stack.hi {
   599				// Run on the system stack to give it more
   600				// stack space.
   601				systemstack(func() {
   602					gcUnwindBarriers(gp, dst)
   603				})
   604				return
   605			}
   606	
   607			// If dst is a global, use the data or BSS bitmaps to
   608			// execute write barriers.
   609			for _, datap := range activeModules() {
   610				if datap.data <= dst && dst < datap.edata {
   611					bulkBarrierBitmap(dst, src, size, dst-datap.data, datap.gcdatamask.bytedata)
   612					return
   613				}
   614			}
   615			for _, datap := range activeModules() {
   616				if datap.bss <= dst && dst < datap.ebss {
   617					bulkBarrierBitmap(dst, src, size, dst-datap.bss, datap.gcbssmask.bytedata)
   618					return
   619				}
   620			}
   621			return
   622		}
   623	
   624		h := heapBitsForAddr(dst)
   625		if src == 0 {
   626			for i := uintptr(0); i < size; i += sys.PtrSize {
   627				if h.isPointer() {
   628					dstx := (*uintptr)(unsafe.Pointer(dst + i))
   629					writebarrierptr_prewrite1(dstx, 0)
   630				}
   631				h = h.next()
   632			}
   633		} else {
   634			for i := uintptr(0); i < size; i += sys.PtrSize {
   635				if h.isPointer() {
   636					dstx := (*uintptr)(unsafe.Pointer(dst + i))
   637					srcx := (*uintptr)(unsafe.Pointer(src + i))
   638					writebarrierptr_prewrite1(dstx, *srcx)
   639				}
   640				h = h.next()
   641			}
   642		}
   643	}
   644	
   645	// bulkBarrierBitmap executes write barriers for copying from [src,
   646	// src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is
   647	// assumed to start maskOffset bytes into the data covered by the
   648	// bitmap in bits (which may not be a multiple of 8).
   649	//
   650	// This is used by bulkBarrierPreWrite for writes to data and BSS.
   651	//
   652	//go:nosplit
   653	func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
   654		word := maskOffset / sys.PtrSize
   655		bits = addb(bits, word/8)
   656		mask := uint8(1) << (word % 8)
   657	
   658		for i := uintptr(0); i < size; i += sys.PtrSize {
   659			if mask == 0 {
   660				bits = addb(bits, 1)
   661				if *bits == 0 {
   662					// Skip 8 words.
   663					i += 7 * sys.PtrSize
   664					continue
   665				}
   666				mask = 1
   667			}
   668			if *bits&mask != 0 {
   669				dstx := (*uintptr)(unsafe.Pointer(dst + i))
   670				if src == 0 {
   671					writebarrierptr_prewrite1(dstx, 0)
   672				} else {
   673					srcx := (*uintptr)(unsafe.Pointer(src + i))
   674					writebarrierptr_prewrite1(dstx, *srcx)
   675				}
   676			}
   677			mask <<= 1
   678		}
   679	}
   680	
   681	// typeBitsBulkBarrier executes writebarrierptr_prewrite for every
   682	// pointer that would be copied from [src, src+size) to [dst,
   683	// dst+size) by a memmove using the type bitmap to locate those
   684	// pointer slots.
   685	//
   686	// The type typ must correspond exactly to [src, src+size) and [dst, dst+size).
   687	// dst, src, and size must be pointer-aligned.
   688	// The type typ must have a plain bitmap, not a GC program.
   689	// The only use of this function is in channel sends, and the
   690	// 64 kB channel element limit takes care of this for us.
   691	//
   692	// Must not be preempted because it typically runs right before memmove,
   693	// and the GC must observe them as an atomic action.
   694	//
   695	//go:nosplit
   696	func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
   697		if typ == nil {
   698			throw("runtime: typeBitsBulkBarrier without type")
   699		}
   700		if typ.size != size {
   701			println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size)
   702			throw("runtime: invalid typeBitsBulkBarrier")
   703		}
   704		if typ.kind&kindGCProg != 0 {
   705			println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog")
   706			throw("runtime: invalid typeBitsBulkBarrier")
   707		}
   708		if !writeBarrier.needed {
   709			return
   710		}
   711		ptrmask := typ.gcdata
   712		var bits uint32
   713		for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
   714			if i&(sys.PtrSize*8-1) == 0 {
   715				bits = uint32(*ptrmask)
   716				ptrmask = addb(ptrmask, 1)
   717			} else {
   718				bits = bits >> 1
   719			}
   720			if bits&1 != 0 {
   721				dstx := (*uintptr)(unsafe.Pointer(dst + i))
   722				srcx := (*uintptr)(unsafe.Pointer(src + i))
   723				writebarrierptr_prewrite(dstx, *srcx)
   724			}
   725		}
   726	}
   727	
   728	// The methods operating on spans all require that h has been returned
   729	// by heapBitsForSpan and that size, n, total are the span layout description
   730	// returned by the mspan's layout method.
   731	// If total > size*n, it means that there is extra leftover memory in the span,
   732	// usually due to rounding.
   733	//
   734	// TODO(rsc): Perhaps introduce a different heapBitsSpan type.
   735	
   736	// initSpan initializes the heap bitmap for a span.
   737	// It clears all checkmark bits.
   738	// If this is a span of pointer-sized objects, it initializes all
   739	// words to pointer/scan.
   740	// Otherwise, it initializes all words to scalar/dead.
   741	func (h heapBits) initSpan(s *mspan) {
   742		size, n, total := s.layout()
   743	
   744		// Init the markbit structures
   745		s.freeindex = 0
   746		s.allocCache = ^uint64(0) // all 1s indicating all free.
   747		s.nelems = n
   748		s.allocBits = nil
   749		s.gcmarkBits = nil
   750		s.gcmarkBits = newMarkBits(s.nelems)
   751		s.allocBits = newAllocBits(s.nelems)
   752	
   753		// Clear bits corresponding to objects.
   754		if total%heapBitmapScale != 0 {
   755			throw("initSpan: unaligned length")
   756		}
   757		nbyte := total / heapBitmapScale
   758		if sys.PtrSize == 8 && size == sys.PtrSize {
   759			end := h.bitp
   760			bitp := subtractb(end, nbyte-1)
   761			for {
   762				*bitp = bitPointerAll | bitScanAll
   763				if bitp == end {
   764					break
   765				}
   766				bitp = add1(bitp)
   767			}
   768			return
   769		}
   770		memclrNoHeapPointers(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
   771	}
   772	
   773	// initCheckmarkSpan initializes a span for being checkmarked.
   774	// It clears the checkmark bits, which are set to 1 in normal operation.
   775	func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
   776		// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
   777		if sys.PtrSize == 8 && size == sys.PtrSize {
   778			// Checkmark bit is type bit, bottom bit of every 2-bit entry.
   779			// Only possible on 64-bit system, since minimum size is 8.
   780			// Must clear type bit (checkmark bit) of every word.
   781			// The type bit is the lower of every two-bit pair.
   782			bitp := h.bitp
   783			for i := uintptr(0); i < n; i += 4 {
   784				*bitp &^= bitPointerAll
   785				bitp = subtract1(bitp)
   786			}
   787			return
   788		}
   789		for i := uintptr(0); i < n; i++ {
   790			*h.bitp &^= bitScan << (heapBitsShift + h.shift)
   791			h = h.forward(size / sys.PtrSize)
   792		}
   793	}
   794	
   795	// clearCheckmarkSpan undoes all the checkmarking in a span.
   796	// The actual checkmark bits are ignored, so the only work to do
   797	// is to fix the pointer bits. (Pointer bits are ignored by scanobject
   798	// but consulted by typedmemmove.)
   799	func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
   800		// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
   801		if sys.PtrSize == 8 && size == sys.PtrSize {
   802			// Checkmark bit is type bit, bottom bit of every 2-bit entry.
   803			// Only possible on 64-bit system, since minimum size is 8.
   804			// Must clear type bit (checkmark bit) of every word.
   805			// The type bit is the lower of every two-bit pair.
   806			bitp := h.bitp
   807			for i := uintptr(0); i < n; i += 4 {
   808				*bitp |= bitPointerAll
   809				bitp = subtract1(bitp)
   810			}
   811		}
   812	}
   813	
   814	// oneBitCount is indexed by byte and produces the
   815	// number of 1 bits in that byte. For example 128 has 1 bit set
   816	// and oneBitCount[128] will holds 1.
   817	var oneBitCount = [256]uint8{
   818		0, 1, 1, 2, 1, 2, 2, 3,
   819		1, 2, 2, 3, 2, 3, 3, 4,
   820		1, 2, 2, 3, 2, 3, 3, 4,
   821		2, 3, 3, 4, 3, 4, 4, 5,
   822		1, 2, 2, 3, 2, 3, 3, 4,
   823		2, 3, 3, 4, 3, 4, 4, 5,
   824		2, 3, 3, 4, 3, 4, 4, 5,
   825		3, 4, 4, 5, 4, 5, 5, 6,
   826		1, 2, 2, 3, 2, 3, 3, 4,
   827		2, 3, 3, 4, 3, 4, 4, 5,
   828		2, 3, 3, 4, 3, 4, 4, 5,
   829		3, 4, 4, 5, 4, 5, 5, 6,
   830		2, 3, 3, 4, 3, 4, 4, 5,
   831		3, 4, 4, 5, 4, 5, 5, 6,
   832		3, 4, 4, 5, 4, 5, 5, 6,
   833		4, 5, 5, 6, 5, 6, 6, 7,
   834		1, 2, 2, 3, 2, 3, 3, 4,
   835		2, 3, 3, 4, 3, 4, 4, 5,
   836		2, 3, 3, 4, 3, 4, 4, 5,
   837		3, 4, 4, 5, 4, 5, 5, 6,
   838		2, 3, 3, 4, 3, 4, 4, 5,
   839		3, 4, 4, 5, 4, 5, 5, 6,
   840		3, 4, 4, 5, 4, 5, 5, 6,
   841		4, 5, 5, 6, 5, 6, 6, 7,
   842		2, 3, 3, 4, 3, 4, 4, 5,
   843		3, 4, 4, 5, 4, 5, 5, 6,
   844		3, 4, 4, 5, 4, 5, 5, 6,
   845		4, 5, 5, 6, 5, 6, 6, 7,
   846		3, 4, 4, 5, 4, 5, 5, 6,
   847		4, 5, 5, 6, 5, 6, 6, 7,
   848		4, 5, 5, 6, 5, 6, 6, 7,
   849		5, 6, 6, 7, 6, 7, 7, 8}
   850	
   851	// countFree runs through the mark bits in a span and counts the number of free objects
   852	// in the span.
   853	// TODO:(rlh) Use popcount intrinsic.
   854	func (s *mspan) countFree() int {
   855		count := 0
   856		maxIndex := s.nelems / 8
   857		for i := uintptr(0); i < maxIndex; i++ {
   858			mrkBits := *addb(s.gcmarkBits, i)
   859			count += int(oneBitCount[mrkBits])
   860		}
   861		if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
   862			mrkBits := *addb(s.gcmarkBits, maxIndex)
   863			mask := uint8((1 << bitsInLastByte) - 1)
   864			bits := mrkBits & mask
   865			count += int(oneBitCount[bits])
   866		}
   867		return int(s.nelems) - count
   868	}
   869	
   870	// heapBitsSetType records that the new allocation [x, x+size)
   871	// holds in [x, x+dataSize) one or more values of type typ.
   872	// (The number of values is given by dataSize / typ.size.)
   873	// If dataSize < size, the fragment [x+dataSize, x+size) is
   874	// recorded as non-pointer data.
   875	// It is known that the type has pointers somewhere;
   876	// malloc does not call heapBitsSetType when there are no pointers,
   877	// because all free objects are marked as noscan during
   878	// heapBitsSweepSpan.
   879	//
   880	// There can only be one allocation from a given span active at a time,
   881	// and the bitmap for a span always falls on byte boundaries,
   882	// so there are no write-write races for access to the heap bitmap.
   883	// Hence, heapBitsSetType can access the bitmap without atomics.
   884	//
   885	// There can be read-write races between heapBitsSetType and things
   886	// that read the heap bitmap like scanobject. However, since
   887	// heapBitsSetType is only used for objects that have not yet been
   888	// made reachable, readers will ignore bits being modified by this
   889	// function. This does mean this function cannot transiently modify
   890	// bits that belong to neighboring objects. Also, on weakly-ordered
   891	// machines, callers must execute a store/store (publication) barrier
   892	// between calling this function and making the object reachable.
   893	func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
   894		const doubleCheck = false // slow but helpful; enable to test modifications to this code
   895	
   896		// dataSize is always size rounded up to the next malloc size class,
   897		// except in the case of allocating a defer block, in which case
   898		// size is sizeof(_defer{}) (at least 6 words) and dataSize may be
   899		// arbitrarily larger.
   900		//
   901		// The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
   902		// assume that dataSize == size without checking it explicitly.
   903	
   904		if sys.PtrSize == 8 && size == sys.PtrSize {
   905			// It's one word and it has pointers, it must be a pointer.
   906			// Since all allocated one-word objects are pointers
   907			// (non-pointers are aggregated into tinySize allocations),
   908			// initSpan sets the pointer bits for us. Nothing to do here.
   909			if doubleCheck {
   910				h := heapBitsForAddr(x)
   911				if !h.isPointer() {
   912					throw("heapBitsSetType: pointer bit missing")
   913				}
   914				if !h.morePointers() {
   915					throw("heapBitsSetType: scan bit missing")
   916				}
   917			}
   918			return
   919		}
   920	
   921		h := heapBitsForAddr(x)
   922		ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
   923	
   924		// Heap bitmap bits for 2-word object are only 4 bits,
   925		// so also shared with objects next to it.
   926		// This is called out as a special case primarily for 32-bit systems,
   927		// so that on 32-bit systems the code below can assume all objects
   928		// are 4-word aligned (because they're all 16-byte aligned).
   929		if size == 2*sys.PtrSize {
   930			if typ.size == sys.PtrSize {
   931				// We're allocating a block big enough to hold two pointers.
   932				// On 64-bit, that means the actual object must be two pointers,
   933				// or else we'd have used the one-pointer-sized block.
   934				// On 32-bit, however, this is the 8-byte block, the smallest one.
   935				// So it could be that we're allocating one pointer and this was
   936				// just the smallest block available. Distinguish by checking dataSize.
   937				// (In general the number of instances of typ being allocated is
   938				// dataSize/typ.size.)
   939				if sys.PtrSize == 4 && dataSize == sys.PtrSize {
   940					// 1 pointer object. On 32-bit machines clear the bit for the
   941					// unused second word.
   942					*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
   943					*h.bitp |= (bitPointer | bitScan) << h.shift
   944				} else {
   945					// 2-element slice of pointer.
   946					*h.bitp |= (bitPointer | bitScan | bitPointer<<heapBitsShift) << h.shift
   947				}
   948				return
   949			}
   950			// Otherwise typ.size must be 2*sys.PtrSize,
   951			// and typ.kind&kindGCProg == 0.
   952			if doubleCheck {
   953				if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
   954					print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
   955					throw("heapBitsSetType")
   956				}
   957			}
   958			b := uint32(*ptrmask)
   959			hb := (b & 3) | bitScan
   960			// bitPointer == 1, bitScan is 1 << 4, heapBitsShift is 1.
   961			// 110011 is shifted h.shift and complemented.
   962			// This clears out the bits that are about to be
   963			// ored into *h.hbitp in the next instructions.
   964			*h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift
   965			*h.bitp |= uint8(hb << h.shift)
   966			return
   967		}
   968	
   969		// Copy from 1-bit ptrmask into 2-bit bitmap.
   970		// The basic approach is to use a single uintptr as a bit buffer,
   971		// alternating between reloading the buffer and writing bitmap bytes.
   972		// In general, one load can supply two bitmap byte writes.
   973		// This is a lot of lines of code, but it compiles into relatively few
   974		// machine instructions.
   975	
   976		var (
   977			// Ptrmask input.
   978			p     *byte   // last ptrmask byte read
   979			b     uintptr // ptrmask bits already loaded
   980			nb    uintptr // number of bits in b at next read
   981			endp  *byte   // final ptrmask byte to read (then repeat)
   982			endnb uintptr // number of valid bits in *endp
   983			pbits uintptr // alternate source of bits
   984	
   985			// Heap bitmap output.
   986			w     uintptr // words processed
   987			nw    uintptr // number of words to process
   988			hbitp *byte   // next heap bitmap byte to write
   989			hb    uintptr // bits being prepared for *hbitp
   990		)
   991	
   992		hbitp = h.bitp
   993	
   994		// Handle GC program. Delayed until this part of the code
   995		// so that we can use the same double-checking mechanism
   996		// as the 1-bit case. Nothing above could have encountered
   997		// GC programs: the cases were all too small.
   998		if typ.kind&kindGCProg != 0 {
   999			heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
  1000			if doubleCheck {
  1001				// Double-check the heap bits written by GC program
  1002				// by running the GC program to create a 1-bit pointer mask
  1003				// and then jumping to the double-check code below.
  1004				// This doesn't catch bugs shared between the 1-bit and 4-bit
  1005				// GC program execution, but it does catch mistakes specific
  1006				// to just one of those and bugs in heapBitsSetTypeGCProg's
  1007				// implementation of arrays.
  1008				lock(&debugPtrmask.lock)
  1009				if debugPtrmask.data == nil {
  1010					debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
  1011				}
  1012				ptrmask = debugPtrmask.data
  1013				runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
  1014				goto Phase4
  1015			}
  1016			return
  1017		}
  1018	
  1019		// Note about sizes:
  1020		//
  1021		// typ.size is the number of words in the object,
  1022		// and typ.ptrdata is the number of words in the prefix
  1023		// of the object that contains pointers. That is, the final
  1024		// typ.size - typ.ptrdata words contain no pointers.
  1025		// This allows optimization of a common pattern where
  1026		// an object has a small header followed by a large scalar
  1027		// buffer. If we know the pointers are over, we don't have
  1028		// to scan the buffer's heap bitmap at all.
  1029		// The 1-bit ptrmasks are sized to contain only bits for
  1030		// the typ.ptrdata prefix, zero padded out to a full byte
  1031		// of bitmap. This code sets nw (below) so that heap bitmap
  1032		// bits are only written for the typ.ptrdata prefix; if there is
  1033		// more room in the allocated object, the next heap bitmap
  1034		// entry is a 00, indicating that there are no more pointers
  1035		// to scan. So only the ptrmask for the ptrdata bytes is needed.
  1036		//
  1037		// Replicated copies are not as nice: if there is an array of
  1038		// objects with scalar tails, all but the last tail does have to
  1039		// be initialized, because there is no way to say "skip forward".
  1040		// However, because of the possibility of a repeated type with
  1041		// size not a multiple of 4 pointers (one heap bitmap byte),
  1042		// the code already must handle the last ptrmask byte specially
  1043		// by treating it as containing only the bits for endnb pointers,
  1044		// where endnb <= 4. We represent large scalar tails that must
  1045		// be expanded in the replication by setting endnb larger than 4.
  1046		// This will have the effect of reading many bits out of b,
  1047		// but once the real bits are shifted out, b will supply as many
  1048		// zero bits as we try to read, which is exactly what we need.
  1049	
  1050		p = ptrmask
  1051		if typ.size < dataSize {
  1052			// Filling in bits for an array of typ.
  1053			// Set up for repetition of ptrmask during main loop.
  1054			// Note that ptrmask describes only a prefix of
  1055			const maxBits = sys.PtrSize*8 - 7
  1056			if typ.ptrdata/sys.PtrSize <= maxBits {
  1057				// Entire ptrmask fits in uintptr with room for a byte fragment.
  1058				// Load into pbits and never read from ptrmask again.
  1059				// This is especially important when the ptrmask has
  1060				// fewer than 8 bits in it; otherwise the reload in the middle
  1061				// of the Phase 2 loop would itself need to loop to gather
  1062				// at least 8 bits.
  1063	
  1064				// Accumulate ptrmask into b.
  1065				// ptrmask is sized to describe only typ.ptrdata, but we record
  1066				// it as describing typ.size bytes, since all the high bits are zero.
  1067				nb = typ.ptrdata / sys.PtrSize
  1068				for i := uintptr(0); i < nb; i += 8 {
  1069					b |= uintptr(*p) << i
  1070					p = add1(p)
  1071				}
  1072				nb = typ.size / sys.PtrSize
  1073	
  1074				// Replicate ptrmask to fill entire pbits uintptr.
  1075				// Doubling and truncating is fewer steps than
  1076				// iterating by nb each time. (nb could be 1.)
  1077				// Since we loaded typ.ptrdata/sys.PtrSize bits
  1078				// but are pretending to have typ.size/sys.PtrSize,
  1079				// there might be no replication necessary/possible.
  1080				pbits = b
  1081				endnb = nb
  1082				if nb+nb <= maxBits {
  1083					for endnb <= sys.PtrSize*8 {
  1084						pbits |= pbits << endnb
  1085						endnb += endnb
  1086					}
  1087					// Truncate to a multiple of original ptrmask.
  1088					endnb = maxBits / nb * nb
  1089					pbits &= 1<<endnb - 1
  1090					b = pbits
  1091					nb = endnb
  1092				}
  1093	
  1094				// Clear p and endp as sentinel for using pbits.
  1095				// Checked during Phase 2 loop.
  1096				p = nil
  1097				endp = nil
  1098			} else {
  1099				// Ptrmask is larger. Read it multiple times.
  1100				n := (typ.ptrdata/sys.PtrSize+7)/8 - 1
  1101				endp = addb(ptrmask, n)
  1102				endnb = typ.size/sys.PtrSize - n*8
  1103			}
  1104		}
  1105		if p != nil {
  1106			b = uintptr(*p)
  1107			p = add1(p)
  1108			nb = 8
  1109		}
  1110	
  1111		if typ.size == dataSize {
  1112			// Single entry: can stop once we reach the non-pointer data.
  1113			nw = typ.ptrdata / sys.PtrSize
  1114		} else {
  1115			// Repeated instances of typ in an array.
  1116			// Have to process first N-1 entries in full, but can stop
  1117			// once we reach the non-pointer data in the final entry.
  1118			nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize
  1119		}
  1120		if nw == 0 {
  1121			// No pointers! Caller was supposed to check.
  1122			println("runtime: invalid type ", typ.string())
  1123			throw("heapBitsSetType: called with non-pointer type")
  1124			return
  1125		}
  1126		if nw < 2 {
  1127			// Must write at least 2 words, because the "no scan"
  1128			// encoding doesn't take effect until the third word.
  1129			nw = 2
  1130		}
  1131	
  1132		// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
  1133		// The leading byte is special because it contains the bits for word 1,
  1134		// which does not have the scan bit set.
  1135		// The leading half-byte is special because it's a half a byte,
  1136		// so we have to be careful with the bits already there.
  1137		switch {
  1138		default:
  1139			throw("heapBitsSetType: unexpected shift")
  1140	
  1141		case h.shift == 0:
  1142			// Ptrmask and heap bitmap are aligned.
  1143			// Handle first byte of bitmap specially.
  1144			//
  1145			// The first byte we write out covers the first four
  1146			// words of the object. The scan/dead bit on the first
  1147			// word must be set to scan since there are pointers
  1148			// somewhere in the object. The scan/dead bit on the
  1149			// second word is the checkmark, so we don't set it.
  1150			// In all following words, we set the scan/dead
  1151			// appropriately to indicate that the object contains
  1152			// to the next 2-bit entry in the bitmap.
  1153			//
  1154			// TODO: It doesn't matter if we set the checkmark, so
  1155			// maybe this case isn't needed any more.
  1156			hb = b & bitPointerAll
  1157			hb |= bitScan | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)
  1158			if w += 4; w >= nw {
  1159				goto Phase3
  1160			}
  1161			*hbitp = uint8(hb)
  1162			hbitp = subtract1(hbitp)
  1163			b >>= 4
  1164			nb -= 4
  1165	
  1166		case sys.PtrSize == 8 && h.shift == 2:
  1167			// Ptrmask and heap bitmap are misaligned.
  1168			// The bits for the first two words are in a byte shared
  1169			// with another object, so we must be careful with the bits
  1170			// already there.
  1171			// We took care of 1-word and 2-word objects above,
  1172			// so this is at least a 6-word object.
  1173			hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
  1174			// This is not noscan, so set the scan bit in the
  1175			// first word.
  1176			hb |= bitScan << (2 * heapBitsShift)
  1177			b >>= 2
  1178			nb -= 2
  1179			// Note: no bitScan for second word because that's
  1180			// the checkmark.
  1181			*hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
  1182			*hbitp |= uint8(hb)
  1183			hbitp = subtract1(hbitp)
  1184			if w += 2; w >= nw {
  1185				// We know that there is more data, because we handled 2-word objects above.
  1186				// This must be at least a 6-word object. If we're out of pointer words,
  1187				// mark no scan in next bitmap byte and finish.
  1188				hb = 0
  1189				w += 4
  1190				goto Phase3
  1191			}
  1192		}
  1193	
  1194		// Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
  1195		// The loop computes the bits for that last write but does not execute the write;
  1196		// it leaves the bits in hb for processing by phase 3.
  1197		// To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
  1198		// use in the first half of the loop right now, and then we only adjust nb explicitly
  1199		// if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
  1200		nb -= 4
  1201		for {
  1202			// Emit bitmap byte.
  1203			// b has at least nb+4 bits, with one exception:
  1204			// if w+4 >= nw, then b has only nw-w bits,
  1205			// but we'll stop at the break and then truncate
  1206			// appropriately in Phase 3.
  1207			hb = b & bitPointerAll
  1208			hb |= bitScanAll
  1209			if w += 4; w >= nw {
  1210				break
  1211			}
  1212			*hbitp = uint8(hb)
  1213			hbitp = subtract1(hbitp)
  1214			b >>= 4
  1215	
  1216			// Load more bits. b has nb right now.
  1217			if p != endp {
  1218				// Fast path: keep reading from ptrmask.
  1219				// nb unmodified: we just loaded 8 bits,
  1220				// and the next iteration will consume 8 bits,
  1221				// leaving us with the same nb the next time we're here.
  1222				if nb < 8 {
  1223					b |= uintptr(*p) << nb
  1224					p = add1(p)
  1225				} else {
  1226					// Reduce the number of bits in b.
  1227					// This is important if we skipped
  1228					// over a scalar tail, since nb could
  1229					// be larger than the bit width of b.
  1230					nb -= 8
  1231				}
  1232			} else if p == nil {
  1233				// Almost as fast path: track bit count and refill from pbits.
  1234				// For short repetitions.
  1235				if nb < 8 {
  1236					b |= pbits << nb
  1237					nb += endnb
  1238				}
  1239				nb -= 8 // for next iteration
  1240			} else {
  1241				// Slow path: reached end of ptrmask.
  1242				// Process final partial byte and rewind to start.
  1243				b |= uintptr(*p) << nb
  1244				nb += endnb
  1245				if nb < 8 {
  1246					b |= uintptr(*ptrmask) << nb
  1247					p = add1(ptrmask)
  1248				} else {
  1249					nb -= 8
  1250					p = ptrmask
  1251				}
  1252			}
  1253	
  1254			// Emit bitmap byte.
  1255			hb = b & bitPointerAll
  1256			hb |= bitScanAll
  1257			if w += 4; w >= nw {
  1258				break
  1259			}
  1260			*hbitp = uint8(hb)
  1261			hbitp = subtract1(hbitp)
  1262			b >>= 4
  1263		}
  1264	
  1265	Phase3:
  1266		// Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
  1267		if w > nw {
  1268			// Counting the 4 entries in hb not yet written to memory,
  1269			// there are more entries than possible pointer slots.
  1270			// Discard the excess entries (can't be more than 3).
  1271			mask := uintptr(1)<<(4-(w-nw)) - 1
  1272			hb &= mask | mask<<4 // apply mask to both pointer bits and scan bits
  1273		}
  1274	
  1275		// Change nw from counting possibly-pointer words to total words in allocation.
  1276		nw = size / sys.PtrSize
  1277	
  1278		// Write whole bitmap bytes.
  1279		// The first is hb, the rest are zero.
  1280		if w <= nw {
  1281			*hbitp = uint8(hb)
  1282			hbitp = subtract1(hbitp)
  1283			hb = 0 // for possible final half-byte below
  1284			for w += 4; w <= nw; w += 4 {
  1285				*hbitp = 0
  1286				hbitp = subtract1(hbitp)
  1287			}
  1288		}
  1289	
  1290		// Write final partial bitmap byte if any.
  1291		// We know w > nw, or else we'd still be in the loop above.
  1292		// It can be bigger only due to the 4 entries in hb that it counts.
  1293		// If w == nw+4 then there's nothing left to do: we wrote all nw entries
  1294		// and can discard the 4 sitting in hb.
  1295		// But if w == nw+2, we need to write first two in hb.
  1296		// The byte is shared with the next object, so be careful with
  1297		// existing bits.
  1298		if w == nw+2 {
  1299			*hbitp = *hbitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | uint8(hb)
  1300		}
  1301	
  1302	Phase4:
  1303		// Phase 4: all done, but perhaps double check.
  1304		if doubleCheck {
  1305			end := heapBitsForAddr(x + size)
  1306			if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
  1307				println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
  1308				print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
  1309				print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
  1310				h0 := heapBitsForAddr(x)
  1311				print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
  1312				print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
  1313				throw("bad heapBitsSetType")
  1314			}
  1315	
  1316			// Double-check that bits to be written were written correctly.
  1317			// Does not check that other bits were not written, unfortunately.
  1318			h := heapBitsForAddr(x)
  1319			nptr := typ.ptrdata / sys.PtrSize
  1320			ndata := typ.size / sys.PtrSize
  1321			count := dataSize / typ.size
  1322			totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize
  1323			for i := uintptr(0); i < size/sys.PtrSize; i++ {
  1324				j := i % ndata
  1325				var have, want uint8
  1326				have = (*h.bitp >> h.shift) & (bitPointer | bitScan)
  1327				if i >= totalptr {
  1328					want = 0 // deadmarker
  1329					if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
  1330						want = bitScan
  1331					}
  1332				} else {
  1333					if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
  1334						want |= bitPointer
  1335					}
  1336					if i != 1 {
  1337						want |= bitScan
  1338					} else {
  1339						have &^= bitScan
  1340					}
  1341				}
  1342				if have != want {
  1343					println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
  1344					print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
  1345					print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
  1346					print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
  1347					h0 := heapBitsForAddr(x)
  1348					print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
  1349					print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
  1350					print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
  1351					println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want)
  1352					if typ.kind&kindGCProg != 0 {
  1353						println("GC program:")
  1354						dumpGCProg(addb(typ.gcdata, 4))
  1355					}
  1356					throw("bad heapBitsSetType")
  1357				}
  1358				h = h.next()
  1359			}
  1360			if ptrmask == debugPtrmask.data {
  1361				unlock(&debugPtrmask.lock)
  1362			}
  1363		}
  1364	}
  1365	
  1366	// heapBitsSetTypeNoScan marks x as noscan by setting the first word
  1367	// of x in the heap bitmap to scalar/dead.
  1368	func heapBitsSetTypeNoScan(x uintptr) {
  1369		h := heapBitsForAddr(uintptr(x))
  1370		*h.bitp &^= (bitPointer | bitScan) << h.shift
  1371	}
  1372	
  1373	var debugPtrmask struct {
  1374		lock mutex
  1375		data *byte
  1376	}
  1377	
  1378	// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
  1379	// progSize is the size of the memory described by the program.
  1380	// elemSize is the size of the element that the GC program describes (a prefix of).
  1381	// dataSize is the total size of the intended data, a multiple of elemSize.
  1382	// allocSize is the total size of the allocated memory.
  1383	//
  1384	// GC programs are only used for large allocations.
  1385	// heapBitsSetType requires that allocSize is a multiple of 4 words,
  1386	// so that the relevant bitmap bytes are not shared with surrounding
  1387	// objects.
  1388	func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
  1389		if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 {
  1390			// Alignment will be wrong.
  1391			throw("heapBitsSetTypeGCProg: small allocation")
  1392		}
  1393		var totalBits uintptr
  1394		if elemSize == dataSize {
  1395			totalBits = runGCProg(prog, nil, h.bitp, 2)
  1396			if totalBits*sys.PtrSize != progSize {
  1397				println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
  1398				throw("heapBitsSetTypeGCProg: unexpected bit count")
  1399			}
  1400		} else {
  1401			count := dataSize / elemSize
  1402	
  1403			// Piece together program trailer to run after prog that does:
  1404			//	literal(0)
  1405			//	repeat(1, elemSize-progSize-1) // zeros to fill element size
  1406			//	repeat(elemSize, count-1) // repeat that element for count
  1407			// This zero-pads the data remaining in the first element and then
  1408			// repeats that first element to fill the array.
  1409			var trailer [40]byte // 3 varints (max 10 each) + some bytes
  1410			i := 0
  1411			if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 {
  1412				// literal(0)
  1413				trailer[i] = 0x01
  1414				i++
  1415				trailer[i] = 0
  1416				i++
  1417				if n > 1 {
  1418					// repeat(1, n-1)
  1419					trailer[i] = 0x81
  1420					i++
  1421					n--
  1422					for ; n >= 0x80; n >>= 7 {
  1423						trailer[i] = byte(n | 0x80)
  1424						i++
  1425					}
  1426					trailer[i] = byte(n)
  1427					i++
  1428				}
  1429			}
  1430			// repeat(elemSize/ptrSize, count-1)
  1431			trailer[i] = 0x80
  1432			i++
  1433			n := elemSize / sys.PtrSize
  1434			for ; n >= 0x80; n >>= 7 {
  1435				trailer[i] = byte(n | 0x80)
  1436				i++
  1437			}
  1438			trailer[i] = byte(n)
  1439			i++
  1440			n = count - 1
  1441			for ; n >= 0x80; n >>= 7 {
  1442				trailer[i] = byte(n | 0x80)
  1443				i++
  1444			}
  1445			trailer[i] = byte(n)
  1446			i++
  1447			trailer[i] = 0
  1448			i++
  1449	
  1450			runGCProg(prog, &trailer[0], h.bitp, 2)
  1451	
  1452			// Even though we filled in the full array just now,
  1453			// record that we only filled in up to the ptrdata of the
  1454			// last element. This will cause the code below to
  1455			// memclr the dead section of the final array element,
  1456			// so that scanobject can stop early in the final element.
  1457			totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
  1458		}
  1459		endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
  1460		endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
  1461		memclrNoHeapPointers(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
  1462	}
  1463	
  1464	// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
  1465	// size the size of the region described by prog, in bytes.
  1466	// The resulting bitvector will have no more than size/sys.PtrSize bits.
  1467	func progToPointerMask(prog *byte, size uintptr) bitvector {
  1468		n := (size/sys.PtrSize + 7) / 8
  1469		x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
  1470		x[len(x)-1] = 0xa1 // overflow check sentinel
  1471		n = runGCProg(prog, nil, &x[0], 1)
  1472		if x[len(x)-1] != 0xa1 {
  1473			throw("progToPointerMask: overflow")
  1474		}
  1475		return bitvector{int32(n), &x[0]}
  1476	}
  1477	
  1478	// Packed GC pointer bitmaps, aka GC programs.
  1479	//
  1480	// For large types containing arrays, the type information has a
  1481	// natural repetition that can be encoded to save space in the
  1482	// binary and in the memory representation of the type information.
  1483	//
  1484	// The encoding is a simple Lempel-Ziv style bytecode machine
  1485	// with the following instructions:
  1486	//
  1487	//	00000000: stop
  1488	//	0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
  1489	//	10000000 n c: repeat the previous n bits c times; n, c are varints
  1490	//	1nnnnnnn c: repeat the previous n bits c times; c is a varint
  1491	
  1492	// runGCProg executes the GC program prog, and then trailer if non-nil,
  1493	// writing to dst with entries of the given size.
  1494	// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
  1495	// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
  1496	// starting at dst (because the heap bitmap does). In this case, the caller guarantees
  1497	// that only whole bytes in dst need to be written.
  1498	//
  1499	// runGCProg returns the number of 1- or 2-bit entries written to memory.
  1500	func runGCProg(prog, trailer, dst *byte, size int) uintptr {
  1501		dstStart := dst
  1502	
  1503		// Bits waiting to be written to memory.
  1504		var bits uintptr
  1505		var nbits uintptr
  1506	
  1507		p := prog
  1508	Run:
  1509		for {
  1510			// Flush accumulated full bytes.
  1511			// The rest of the loop assumes that nbits <= 7.
  1512			for ; nbits >= 8; nbits -= 8 {
  1513				if size == 1 {
  1514					*dst = uint8(bits)
  1515					dst = add1(dst)
  1516					bits >>= 8
  1517				} else {
  1518					v := bits&bitPointerAll | bitScanAll
  1519					*dst = uint8(v)
  1520					dst = subtract1(dst)
  1521					bits >>= 4
  1522					v = bits&bitPointerAll | bitScanAll
  1523					*dst = uint8(v)
  1524					dst = subtract1(dst)
  1525					bits >>= 4
  1526				}
  1527			}
  1528	
  1529			// Process one instruction.
  1530			inst := uintptr(*p)
  1531			p = add1(p)
  1532			n := inst & 0x7F
  1533			if inst&0x80 == 0 {
  1534				// Literal bits; n == 0 means end of program.
  1535				if n == 0 {
  1536					// Program is over; continue in trailer if present.
  1537					if trailer != nil {
  1538						//println("trailer")
  1539						p = trailer
  1540						trailer = nil
  1541						continue
  1542					}
  1543					//println("done")
  1544					break Run
  1545				}
  1546				//println("lit", n, dst)
  1547				nbyte := n / 8
  1548				for i := uintptr(0); i < nbyte; i++ {
  1549					bits |= uintptr(*p) << nbits
  1550					p = add1(p)
  1551					if size == 1 {
  1552						*dst = uint8(bits)
  1553						dst = add1(dst)
  1554						bits >>= 8
  1555					} else {
  1556						v := bits&0xf | bitScanAll
  1557						*dst = uint8(v)
  1558						dst = subtract1(dst)
  1559						bits >>= 4
  1560						v = bits&0xf | bitScanAll
  1561						*dst = uint8(v)
  1562						dst = subtract1(dst)
  1563						bits >>= 4
  1564					}
  1565				}
  1566				if n %= 8; n > 0 {
  1567					bits |= uintptr(*p) << nbits
  1568					p = add1(p)
  1569					nbits += n
  1570				}
  1571				continue Run
  1572			}
  1573	
  1574			// Repeat. If n == 0, it is encoded in a varint in the next bytes.
  1575			if n == 0 {
  1576				for off := uint(0); ; off += 7 {
  1577					x := uintptr(*p)
  1578					p = add1(p)
  1579					n |= (x & 0x7F) << off
  1580					if x&0x80 == 0 {
  1581						break
  1582					}
  1583				}
  1584			}
  1585	
  1586			// Count is encoded in a varint in the next bytes.
  1587			c := uintptr(0)
  1588			for off := uint(0); ; off += 7 {
  1589				x := uintptr(*p)
  1590				p = add1(p)
  1591				c |= (x & 0x7F) << off
  1592				if x&0x80 == 0 {
  1593					break
  1594				}
  1595			}
  1596			c *= n // now total number of bits to copy
  1597	
  1598			// If the number of bits being repeated is small, load them
  1599			// into a register and use that register for the entire loop
  1600			// instead of repeatedly reading from memory.
  1601			// Handling fewer than 8 bits here makes the general loop simpler.
  1602			// The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
  1603			// the pattern to a bit buffer holding at most 7 bits (a partial byte)
  1604			// it will not overflow.
  1605			src := dst
  1606			const maxBits = sys.PtrSize*8 - 7
  1607			if n <= maxBits {
  1608				// Start with bits in output buffer.
  1609				pattern := bits
  1610				npattern := nbits
  1611	
  1612				// If we need more bits, fetch them from memory.
  1613				if size == 1 {
  1614					src = subtract1(src)
  1615					for npattern < n {
  1616						pattern <<= 8
  1617						pattern |= uintptr(*src)
  1618						src = subtract1(src)
  1619						npattern += 8
  1620					}
  1621				} else {
  1622					src = add1(src)
  1623					for npattern < n {
  1624						pattern <<= 4
  1625						pattern |= uintptr(*src) & 0xf
  1626						src = add1(src)
  1627						npattern += 4
  1628					}
  1629				}
  1630	
  1631				// We started with the whole bit output buffer,
  1632				// and then we loaded bits from whole bytes.
  1633				// Either way, we might now have too many instead of too few.
  1634				// Discard the extra.
  1635				if npattern > n {
  1636					pattern >>= npattern - n
  1637					npattern = n
  1638				}
  1639	
  1640				// Replicate pattern to at most maxBits.
  1641				if npattern == 1 {
  1642					// One bit being repeated.
  1643					// If the bit is 1, make the pattern all 1s.
  1644					// If the bit is 0, the pattern is already all 0s,
  1645					// but we can claim that the number of bits
  1646					// in the word is equal to the number we need (c),
  1647					// because right shift of bits will zero fill.
  1648					if pattern == 1 {
  1649						pattern = 1<<maxBits - 1
  1650						npattern = maxBits
  1651					} else {
  1652						npattern = c
  1653					}
  1654				} else {
  1655					b := pattern
  1656					nb := npattern
  1657					if nb+nb <= maxBits {
  1658						// Double pattern until the whole uintptr is filled.
  1659						for nb <= sys.PtrSize*8 {
  1660							b |= b << nb
  1661							nb += nb
  1662						}
  1663						// Trim away incomplete copy of original pattern in high bits.
  1664						// TODO(rsc): Replace with table lookup or loop on systems without divide?
  1665						nb = maxBits / npattern * npattern
  1666						b &= 1<<nb - 1
  1667						pattern = b
  1668						npattern = nb
  1669					}
  1670				}
  1671	
  1672				// Add pattern to bit buffer and flush bit buffer, c/npattern times.
  1673				// Since pattern contains >8 bits, there will be full bytes to flush
  1674				// on each iteration.
  1675				for ; c >= npattern; c -= npattern {
  1676					bits |= pattern << nbits
  1677					nbits += npattern
  1678					if size == 1 {
  1679						for nbits >= 8 {
  1680							*dst = uint8(bits)
  1681							dst = add1(dst)
  1682							bits >>= 8
  1683							nbits -= 8
  1684						}
  1685					} else {
  1686						for nbits >= 4 {
  1687							*dst = uint8(bits&0xf | bitScanAll)
  1688							dst = subtract1(dst)
  1689							bits >>= 4
  1690							nbits -= 4
  1691						}
  1692					}
  1693				}
  1694	
  1695				// Add final fragment to bit buffer.
  1696				if c > 0 {
  1697					pattern &= 1<<c - 1
  1698					bits |= pattern << nbits
  1699					nbits += c
  1700				}
  1701				continue Run
  1702			}
  1703	
  1704			// Repeat; n too large to fit in a register.
  1705			// Since nbits <= 7, we know the first few bytes of repeated data
  1706			// are already written to memory.
  1707			off := n - nbits // n > nbits because n > maxBits and nbits <= 7
  1708			if size == 1 {
  1709				// Leading src fragment.
  1710				src = subtractb(src, (off+7)/8)
  1711				if frag := off & 7; frag != 0 {
  1712					bits |= uintptr(*src) >> (8 - frag) << nbits
  1713					src = add1(src)
  1714					nbits += frag
  1715					c -= frag
  1716				}
  1717				// Main loop: load one byte, write another.
  1718				// The bits are rotating through the bit buffer.
  1719				for i := c / 8; i > 0; i-- {
  1720					bits |= uintptr(*src) << nbits
  1721					src = add1(src)
  1722					*dst = uint8(bits)
  1723					dst = add1(dst)
  1724					bits >>= 8
  1725				}
  1726				// Final src fragment.
  1727				if c %= 8; c > 0 {
  1728					bits |= (uintptr(*src) & (1<<c - 1)) << nbits
  1729					nbits += c
  1730				}
  1731			} else {
  1732				// Leading src fragment.
  1733				src = addb(src, (off+3)/4)
  1734				if frag := off & 3; frag != 0 {
  1735					bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
  1736					src = subtract1(src)
  1737					nbits += frag
  1738					c -= frag
  1739				}
  1740				// Main loop: load one byte, write another.
  1741				// The bits are rotating through the bit buffer.
  1742				for i := c / 4; i > 0; i-- {
  1743					bits |= (uintptr(*src) & 0xf) << nbits
  1744					src = subtract1(src)
  1745					*dst = uint8(bits&0xf | bitScanAll)
  1746					dst = subtract1(dst)
  1747					bits >>= 4
  1748				}
  1749				// Final src fragment.
  1750				if c %= 4; c > 0 {
  1751					bits |= (uintptr(*src) & (1<<c - 1)) << nbits
  1752					nbits += c
  1753				}
  1754			}
  1755		}
  1756	
  1757		// Write any final bits out, using full-byte writes, even for the final byte.
  1758		var totalBits uintptr
  1759		if size == 1 {
  1760			totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
  1761			nbits += -nbits & 7
  1762			for ; nbits > 0; nbits -= 8 {
  1763				*dst = uint8(bits)
  1764				dst = add1(dst)
  1765				bits >>= 8
  1766			}
  1767		} else {
  1768			totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
  1769			nbits += -nbits & 3
  1770			for ; nbits > 0; nbits -= 4 {
  1771				v := bits&0xf | bitScanAll
  1772				*dst = uint8(v)
  1773				dst = subtract1(dst)
  1774				bits >>= 4
  1775			}
  1776		}
  1777		return totalBits
  1778	}
  1779	
  1780	func dumpGCProg(p *byte) {
  1781		nptr := 0
  1782		for {
  1783			x := *p
  1784			p = add1(p)
  1785			if x == 0 {
  1786				print("\t", nptr, " end\n")
  1787				break
  1788			}
  1789			if x&0x80 == 0 {
  1790				print("\t", nptr, " lit ", x, ":")
  1791				n := int(x+7) / 8
  1792				for i := 0; i < n; i++ {
  1793					print(" ", hex(*p))
  1794					p = add1(p)
  1795				}
  1796				print("\n")
  1797				nptr += int(x)
  1798			} else {
  1799				nbit := int(x &^ 0x80)
  1800				if nbit == 0 {
  1801					for nb := uint(0); ; nb += 7 {
  1802						x := *p
  1803						p = add1(p)
  1804						nbit |= int(x&0x7f) << nb
  1805						if x&0x80 == 0 {
  1806							break
  1807						}
  1808					}
  1809				}
  1810				count := 0
  1811				for nb := uint(0); ; nb += 7 {
  1812					x := *p
  1813					p = add1(p)
  1814					count |= int(x&0x7f) << nb
  1815					if x&0x80 == 0 {
  1816						break
  1817					}
  1818				}
  1819				print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
  1820				nptr += nbit * count
  1821			}
  1822		}
  1823	}
  1824	
  1825	// Testing.
  1826	
  1827	func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
  1828		target := (*stkframe)(ctxt)
  1829		if frame.sp <= target.sp && target.sp < frame.varp {
  1830			*target = *frame
  1831			return false
  1832		}
  1833		return true
  1834	}
  1835	
  1836	// gcbits returns the GC type info for x, for testing.
  1837	// The result is the bitmap entries (0 or 1), one entry per byte.
  1838	//go:linkname reflect_gcbits reflect.gcbits
  1839	func reflect_gcbits(x interface{}) []byte {
  1840		ret := getgcmask(x)
  1841		typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem
  1842		nptr := typ.ptrdata / sys.PtrSize
  1843		for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
  1844			ret = ret[:len(ret)-1]
  1845		}
  1846		return ret
  1847	}
  1848	
  1849	// Returns GC type info for object p for testing.
  1850	func getgcmask(ep interface{}) (mask []byte) {
  1851		e := *efaceOf(&ep)
  1852		p := e.data
  1853		t := e._type
  1854		// data or bss
  1855		for _, datap := range activeModules() {
  1856			// data
  1857			if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
  1858				bitmap := datap.gcdatamask.bytedata
  1859				n := (*ptrtype)(unsafe.Pointer(t)).elem.size
  1860				mask = make([]byte, n/sys.PtrSize)
  1861				for i := uintptr(0); i < n; i += sys.PtrSize {
  1862					off := (uintptr(p) + i - datap.data) / sys.PtrSize
  1863					mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
  1864				}
  1865				return
  1866			}
  1867	
  1868			// bss
  1869			if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
  1870				bitmap := datap.gcbssmask.bytedata
  1871				n := (*ptrtype)(unsafe.Pointer(t)).elem.size
  1872				mask = make([]byte, n/sys.PtrSize)
  1873				for i := uintptr(0); i < n; i += sys.PtrSize {
  1874					off := (uintptr(p) + i - datap.bss) / sys.PtrSize
  1875					mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
  1876				}
  1877				return
  1878			}
  1879		}
  1880	
  1881		// heap
  1882		var n uintptr
  1883		var base uintptr
  1884		if mlookup(uintptr(p), &base, &n, nil) != 0 {
  1885			mask = make([]byte, n/sys.PtrSize)
  1886			for i := uintptr(0); i < n; i += sys.PtrSize {
  1887				hbits := heapBitsForAddr(base + i)
  1888				if hbits.isPointer() {
  1889					mask[i/sys.PtrSize] = 1
  1890				}
  1891				if i != 1*sys.PtrSize && !hbits.morePointers() {
  1892					mask = mask[:i/sys.PtrSize]
  1893					break
  1894				}
  1895			}
  1896			return
  1897		}
  1898	
  1899		// stack
  1900		if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
  1901			var frame stkframe
  1902			frame.sp = uintptr(p)
  1903			_g_ := getg()
  1904			gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
  1905			if frame.fn != nil {
  1906				f := frame.fn
  1907				targetpc := frame.continpc
  1908				if targetpc == 0 {
  1909					return
  1910				}
  1911				if targetpc != f.entry {
  1912					targetpc--
  1913				}
  1914				pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, nil)
  1915				if pcdata == -1 {
  1916					return
  1917				}
  1918				stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
  1919				if stkmap == nil || stkmap.n <= 0 {
  1920					return
  1921				}
  1922				bv := stackmapdata(stkmap, pcdata)
  1923				size := uintptr(bv.n) * sys.PtrSize
  1924				n := (*ptrtype)(unsafe.Pointer(t)).elem.size
  1925				mask = make([]byte, n/sys.PtrSize)
  1926				for i := uintptr(0); i < n; i += sys.PtrSize {
  1927					bitmap := bv.bytedata
  1928					off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
  1929					mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
  1930				}
  1931			}
  1932			return
  1933		}
  1934	
  1935		// otherwise, not something the GC knows about.
  1936		// possibly read-only data, like malloc(0).
  1937		// must not have pointers
  1938		return
  1939	}
  1940	

View as plain text