...
Run Format

Source file src/runtime/mbitmap.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Garbage collector: type and heap bitmaps.
     6	//
     7	// Stack, data, and bss bitmaps
     8	//
     9	// Stack frames and global variables in the data and bss sections are described
    10	// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
    11	// to be visited during GC. The bits in each byte are consumed starting with
    12	// the low bit: 1<<0, 1<<1, and so on.
    13	//
    14	// Heap bitmap
    15	//
    16	// The allocated heap comes from a subset of the memory in the range [start, used),
    17	// where start == mheap_.arena_start and used == mheap_.arena_used.
    18	// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
    19	// stored in bytes indexed backward in memory from start.
    20	// That is, the byte at address start-1 holds the 2-bit entries for the four words
    21	// start through start+3*ptrSize, the byte at start-2 holds the entries for
    22	// start+4*ptrSize through start+7*ptrSize, and so on.
    23	//
    24	// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
    25	// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
    26	// The meaning of the high bit depends on the position of the word being described
    27	// in its allocated object. In all words *except* the second word, the
    28	// high bit indicates that the object is still being described. In
    29	// these words, if a bit pair with a high bit 0 is encountered, the
    30	// low bit can also be assumed to be 0, and the object description is
    31	// over. This 00 is called the ``dead'' encoding: it signals that the
    32	// rest of the words in the object are uninteresting to the garbage
    33	// collector.
    34	//
    35	// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
    36	//
    37	// The 2-bit entries are split when written into the byte, so that the top half
    38	// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
    39	// bits.
    40	// This form allows a copy from the 1-bit to the 4-bit form to keep the
    41	// pointer bits contiguous, instead of having to space them out.
    42	//
    43	// The code makes use of the fact that the zero value for a heap bitmap
    44	// has no live pointer bit set and is (depending on position), not used,
    45	// not checkmarked, and is the dead encoding.
    46	// These properties must be preserved when modifying the encoding.
    47	//
    48	// Checkmarks
    49	//
    50	// In a concurrent garbage collector, one worries about failing to mark
    51	// a live object due to mutations without write barriers or bugs in the
    52	// collector implementation. As a sanity check, the GC has a 'checkmark'
    53	// mode that retraverses the object graph with the world stopped, to make
    54	// sure that everything that should be marked is marked.
    55	// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
    56	// for the second word of the object holds the checkmark bit.
    57	// When not in checkmark mode, this bit is set to 1.
    58	//
    59	// The smallest possible allocation is 8 bytes. On a 32-bit machine, that
    60	// means every allocated object has two words, so there is room for the
    61	// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
    62	// just one word, so the second bit pair is not available for encoding the
    63	// checkmark. However, because non-pointer allocations are combined
    64	// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
    65	// must be a pointer, so the type bit in the first word is not actually needed.
    66	// It is still used in general, except in checkmark the type bit is repurposed
    67	// as the checkmark bit and then reinitialized (to 1) as the type bit when
    68	// finished.
    69	//
    70	
    71	package runtime
    72	
    73	import (
    74		"runtime/internal/atomic"
    75		"runtime/internal/sys"
    76		"unsafe"
    77	)
    78	
    79	const (
    80		bitPointer = 1 << 0
    81		bitMarked  = 1 << 4 // TODO: Rename bitScan.
    82	
    83		heapBitsShift   = 1                     // shift offset between successive bitPointer or bitMarked entries
    84		heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
    85	
    86		// all mark/pointer bits in a byte
    87		bitMarkedAll  = bitMarked | bitMarked<<heapBitsShift | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
    88		bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
    89	)
    90	
    91	// addb returns the byte pointer p+n.
    92	//go:nowritebarrier
    93	//go:nosplit
    94	func addb(p *byte, n uintptr) *byte {
    95		// Note: wrote out full expression instead of calling add(p, n)
    96		// to reduce the number of temporaries generated by the
    97		// compiler for this trivial expression during inlining.
    98		return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
    99	}
   100	
   101	// subtractb returns the byte pointer p-n.
   102	// subtractb is typically used when traversing the pointer tables referred to by hbits
   103	// which are arranged in reverse order.
   104	//go:nowritebarrier
   105	//go:nosplit
   106	func subtractb(p *byte, n uintptr) *byte {
   107		// Note: wrote out full expression instead of calling add(p, -n)
   108		// to reduce the number of temporaries generated by the
   109		// compiler for this trivial expression during inlining.
   110		return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
   111	}
   112	
   113	// add1 returns the byte pointer p+1.
   114	//go:nowritebarrier
   115	//go:nosplit
   116	func add1(p *byte) *byte {
   117		// Note: wrote out full expression instead of calling addb(p, 1)
   118		// to reduce the number of temporaries generated by the
   119		// compiler for this trivial expression during inlining.
   120		return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
   121	}
   122	
   123	// subtract1 returns the byte pointer p-1.
   124	// subtract1 is typically used when traversing the pointer tables referred to by hbits
   125	// which are arranged in reverse order.
   126	//go:nowritebarrier
   127	//
   128	// nosplit because it is used during write barriers and must not be preempted.
   129	//go:nosplit
   130	func subtract1(p *byte) *byte {
   131		// Note: wrote out full expression instead of calling subtractb(p, 1)
   132		// to reduce the number of temporaries generated by the
   133		// compiler for this trivial expression during inlining.
   134		return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
   135	}
   136	
   137	// mHeap_MapBits is called each time arena_used is extended.
   138	// It maps any additional bitmap memory needed for the new arena memory.
   139	// It must be called with the expected new value of arena_used,
   140	// *before* h.arena_used has been updated.
   141	// Waiting to update arena_used until after the memory has been mapped
   142	// avoids faults when other threads try access the bitmap immediately
   143	// after observing the change to arena_used.
   144	//
   145	//go:nowritebarrier
   146	func (h *mheap) mapBits(arena_used uintptr) {
   147		// Caller has added extra mappings to the arena.
   148		// Add extra mappings of bitmap words as needed.
   149		// We allocate extra bitmap pieces in chunks of bitmapChunk.
   150		const bitmapChunk = 8192
   151	
   152		n := (arena_used - mheap_.arena_start) / heapBitmapScale
   153		n = round(n, bitmapChunk)
   154		n = round(n, sys.PhysPageSize)
   155		if h.bitmap_mapped >= n {
   156			return
   157		}
   158	
   159		sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
   160		h.bitmap_mapped = n
   161	}
   162	
   163	// heapBits provides access to the bitmap bits for a single heap word.
   164	// The methods on heapBits take value receivers so that the compiler
   165	// can more easily inline calls to those methods and registerize the
   166	// struct fields independently.
   167	type heapBits struct {
   168		bitp  *uint8
   169		shift uint32
   170	}
   171	
   172	// markBits provides access to the mark bit for an object in the heap.
   173	// bytep points to the byte holding the mark bit.
   174	// mask is a byte with a single bit set that can be &ed with *bytep
   175	// to see if the bit has been set.
   176	// *m.byte&m.mask != 0 indicates the mark bit is set.
   177	// index can be used along with span information to generate
   178	// the address of the object in the heap.
   179	// We maintain one set of mark bits for allocation and one for
   180	// marking purposes.
   181	type markBits struct {
   182		bytep *uint8
   183		mask  uint8
   184		index uintptr
   185	}
   186	
   187	//go:nosplit
   188	func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
   189		whichByte := allocBitIndex / 8
   190		whichBit := allocBitIndex % 8
   191		bytePtr := addb(s.allocBits, whichByte)
   192		return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex}
   193	}
   194	
   195	// refillaCache takes 8 bytes s.allocBits starting at whichByte
   196	// and negates them so that ctz (count trailing zeros) instructions
   197	// can be used. It then places these 8 bytes into the cached 64 bit
   198	// s.allocCache.
   199	func (s *mspan) refillAllocCache(whichByte uintptr) {
   200		bytes := (*[8]uint8)(unsafe.Pointer(addb(s.allocBits, whichByte)))
   201		aCache := uint64(0)
   202		aCache |= uint64(bytes[0])
   203		aCache |= uint64(bytes[1]) << (1 * 8)
   204		aCache |= uint64(bytes[2]) << (2 * 8)
   205		aCache |= uint64(bytes[3]) << (3 * 8)
   206		aCache |= uint64(bytes[4]) << (4 * 8)
   207		aCache |= uint64(bytes[5]) << (5 * 8)
   208		aCache |= uint64(bytes[6]) << (6 * 8)
   209		aCache |= uint64(bytes[7]) << (7 * 8)
   210		s.allocCache = ^aCache
   211	}
   212	
   213	// nextFreeIndex returns the index of the next free object in s at
   214	// or after s.freeindex.
   215	// There are hardware instructions that can be used to make this
   216	// faster if profiling warrants it.
   217	func (s *mspan) nextFreeIndex() uintptr {
   218		sfreeindex := s.freeindex
   219		snelems := s.nelems
   220		if sfreeindex == snelems {
   221			return sfreeindex
   222		}
   223		if sfreeindex > snelems {
   224			throw("s.freeindex > s.nelems")
   225		}
   226	
   227		aCache := s.allocCache
   228	
   229		bitIndex := sys.Ctz64(aCache)
   230		for bitIndex == 64 {
   231			// Move index to start of next cached bits.
   232			sfreeindex = (sfreeindex + 64) &^ (64 - 1)
   233			if sfreeindex >= snelems {
   234				s.freeindex = snelems
   235				return snelems
   236			}
   237			whichByte := sfreeindex / 8
   238			// Refill s.allocCache with the next 64 alloc bits.
   239			s.refillAllocCache(whichByte)
   240			aCache = s.allocCache
   241			bitIndex = sys.Ctz64(aCache)
   242			// nothing available in cached bits
   243			// grab the next 8 bytes and try again.
   244		}
   245		result := sfreeindex + uintptr(bitIndex)
   246		if result >= snelems {
   247			s.freeindex = snelems
   248			return snelems
   249		}
   250	
   251		s.allocCache >>= (bitIndex + 1)
   252		sfreeindex = result + 1
   253	
   254		if sfreeindex%64 == 0 && sfreeindex != snelems {
   255			// We just incremented s.freeindex so it isn't 0.
   256			// As each 1 in s.allocCache was encountered and used for allocation
   257			// it was shifted away. At this point s.allocCache contains all 0s.
   258			// Refill s.allocCache so that it corresponds
   259			// to the bits at s.allocBits starting at s.freeindex.
   260			whichByte := sfreeindex / 8
   261			s.refillAllocCache(whichByte)
   262		}
   263		s.freeindex = sfreeindex
   264		return result
   265	}
   266	
   267	func (s *mspan) isFree(index uintptr) bool {
   268		whichByte := index / 8
   269		whichBit := index % 8
   270		byteVal := *addb(s.allocBits, whichByte)
   271		return byteVal&uint8(1<<whichBit) == 0
   272	}
   273	
   274	func (s *mspan) objIndex(p uintptr) uintptr {
   275		byteOffset := p - s.base()
   276		if byteOffset == 0 {
   277			return 0
   278		}
   279		if s.baseMask != 0 {
   280			// s.baseMask is 0, elemsize is a power of two, so shift by s.divShift
   281			return byteOffset >> s.divShift
   282		}
   283		return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2)
   284	}
   285	
   286	func markBitsForAddr(p uintptr) markBits {
   287		s := spanOf(p)
   288		objIndex := s.objIndex(p)
   289		return s.markBitsForIndex(objIndex)
   290	}
   291	
   292	func (s *mspan) markBitsForIndex(objIndex uintptr) markBits {
   293		whichByte := objIndex / 8
   294		bitMask := uint8(1 << (objIndex % 8)) // low 3 bits hold the bit index
   295		bytePtr := addb(s.gcmarkBits, whichByte)
   296		return markBits{bytePtr, bitMask, objIndex}
   297	}
   298	
   299	func (s *mspan) markBitsForBase() markBits {
   300		return markBits{s.gcmarkBits, uint8(1), 0}
   301	}
   302	
   303	// isMarked reports whether mark bit m is set.
   304	func (m markBits) isMarked() bool {
   305		return *m.bytep&m.mask != 0
   306	}
   307	
   308	// setMarked sets the marked bit in the markbits, atomically. Some compilers
   309	// are not able to inline atomic.Or8 function so if it appears as a hot spot consider
   310	// inlining it manually.
   311	func (m markBits) setMarked() {
   312		// Might be racing with other updates, so use atomic update always.
   313		// We used to be clever here and use a non-atomic update in certain
   314		// cases, but it's not worth the risk.
   315		atomic.Or8(m.bytep, m.mask)
   316	}
   317	
   318	// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
   319	func (m markBits) setMarkedNonAtomic() {
   320		*m.bytep |= m.mask
   321	}
   322	
   323	// clearMarked clears the marked bit in the markbits, atomically.
   324	func (m markBits) clearMarked() {
   325		// Might be racing with other updates, so use atomic update always.
   326		// We used to be clever here and use a non-atomic update in certain
   327		// cases, but it's not worth the risk.
   328		atomic.And8(m.bytep, ^m.mask)
   329	}
   330	
   331	// clearMarkedNonAtomic clears the marked bit non-atomically.
   332	func (m markBits) clearMarkedNonAtomic() {
   333		*m.bytep ^= m.mask
   334	}
   335	
   336	// markBitsForSpan returns the markBits for the span base address base.
   337	func markBitsForSpan(base uintptr) (mbits markBits) {
   338		if base < mheap_.arena_start || base >= mheap_.arena_used {
   339			throw("heapBitsForSpan: base out of range")
   340		}
   341		mbits = markBitsForAddr(base)
   342		if mbits.mask != 1 {
   343			throw("markBitsForSpan: unaligned start")
   344		}
   345		return mbits
   346	}
   347	
   348	// advance advances the markBits to the next object in the span.
   349	func (m *markBits) advance() {
   350		if m.mask == 1<<7 {
   351			m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
   352			m.mask = 1
   353		} else {
   354			m.mask = m.mask << 1
   355		}
   356		m.index++
   357	}
   358	
   359	// heapBitsForAddr returns the heapBits for the address addr.
   360	// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
   361	//
   362	// nosplit because it is used during write barriers and must not be preempted.
   363	//go:nosplit
   364	func heapBitsForAddr(addr uintptr) heapBits {
   365		// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
   366		off := (addr - mheap_.arena_start) / sys.PtrSize
   367		return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)}
   368	}
   369	
   370	// heapBitsForSpan returns the heapBits for the span base address base.
   371	func heapBitsForSpan(base uintptr) (hbits heapBits) {
   372		if base < mheap_.arena_start || base >= mheap_.arena_used {
   373			throw("heapBitsForSpan: base out of range")
   374		}
   375		return heapBitsForAddr(base)
   376	}
   377	
   378	// heapBitsForObject returns the base address for the heap object
   379	// containing the address p, the heapBits for base,
   380	// the object's span, and of the index of the object in s.
   381	// If p does not point into a heap object,
   382	// return base == 0
   383	// otherwise return the base of the object.
   384	//
   385	// refBase and refOff optionally give the base address of the object
   386	// in which the pointer p was found and the byte offset at which it
   387	// was found. These are used for error reporting.
   388	func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) {
   389		arenaStart := mheap_.arena_start
   390		if p < arenaStart || p >= mheap_.arena_used {
   391			return
   392		}
   393		off := p - arenaStart
   394		idx := off >> _PageShift
   395		// p points into the heap, but possibly to the middle of an object.
   396		// Consult the span table to find the block beginning.
   397		s = h_spans[idx]
   398		if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
   399			if s == nil || s.state == _MSpanStack {
   400				// If s is nil, the virtual address has never been part of the heap.
   401				// This pointer may be to some mmap'd region, so we allow it.
   402				// Pointers into stacks are also ok, the runtime manages these explicitly.
   403				return
   404			}
   405	
   406			// The following ensures that we are rigorous about what data
   407			// structures hold valid pointers.
   408			if debug.invalidptr != 0 {
   409				// Typically this indicates an incorrect use
   410				// of unsafe or cgo to store a bad pointer in
   411				// the Go heap. It may also indicate a runtime
   412				// bug.
   413				//
   414				// TODO(austin): We could be more aggressive
   415				// and detect pointers to unallocated objects
   416				// in allocated spans.
   417				printlock()
   418				print("runtime: pointer ", hex(p))
   419				if s.state != mSpanInUse {
   420					print(" to unallocated span")
   421				} else {
   422					print(" to unused region of span")
   423				}
   424				print("idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
   425				if refBase != 0 {
   426					print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
   427					gcDumpObject("object", refBase, refOff)
   428				}
   429				throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
   430			}
   431			return
   432		}
   433		// If this span holds object of a power of 2 size, just mask off the bits to
   434		// the interior of the object. Otherwise use the size to get the base.
   435		if s.baseMask != 0 {
   436			// optimize for power of 2 sized objects.
   437			base = s.base()
   438			base = base + (p-base)&s.baseMask
   439			objIndex = (base - s.base()) >> s.divShift
   440			// base = p & s.baseMask is faster for small spans,
   441			// but doesn't work for large spans.
   442			// Overall, it's faster to use the more general computation above.
   443		} else {
   444			base = s.base()
   445			if p-base >= s.elemsize {
   446				// n := (p - base) / s.elemsize, using division by multiplication
   447				objIndex = uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
   448				base += objIndex * s.elemsize
   449			}
   450		}
   451		// Now that we know the actual base, compute heapBits to return to caller.
   452		hbits = heapBitsForAddr(base)
   453		return
   454	}
   455	
   456	// prefetch the bits.
   457	func (h heapBits) prefetch() {
   458		prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
   459	}
   460	
   461	// next returns the heapBits describing the next pointer-sized word in memory.
   462	// That is, if h describes address p, h.next() describes p+ptrSize.
   463	// Note that next does not modify h. The caller must record the result.
   464	//
   465	// nosplit because it is used during write barriers and must not be preempted.
   466	//go:nosplit
   467	func (h heapBits) next() heapBits {
   468		if h.shift < 3*heapBitsShift {
   469			return heapBits{h.bitp, h.shift + heapBitsShift}
   470		}
   471		return heapBits{subtract1(h.bitp), 0}
   472	}
   473	
   474	// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
   475	// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
   476	// h.forward(1) is equivalent to h.next(), just slower.
   477	// Note that forward does not modify h. The caller must record the result.
   478	// bits returns the heap bits for the current word.
   479	func (h heapBits) forward(n uintptr) heapBits {
   480		n += uintptr(h.shift) / heapBitsShift
   481		return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
   482	}
   483	
   484	// The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer.
   485	// The result includes in its higher bits the bits for subsequent words
   486	// described by the same bitmap byte.
   487	func (h heapBits) bits() uint32 {
   488		// The (shift & 31) eliminates a test and conditional branch
   489		// from the generated code.
   490		return uint32(*h.bitp) >> (h.shift & 31)
   491	}
   492	
   493	// morePointers returns true if this word and all remaining words in this object
   494	// are scalars.
   495	// h must not describe the second word of the object.
   496	func (h heapBits) morePointers() bool {
   497		return h.bits()&bitMarked != 0
   498	}
   499	
   500	// isPointer reports whether the heap bits describe a pointer word.
   501	//
   502	// nosplit because it is used during write barriers and must not be preempted.
   503	//go:nosplit
   504	func (h heapBits) isPointer() bool {
   505		return h.bits()&bitPointer != 0
   506	}
   507	
   508	// hasPointers reports whether the given object has any pointers.
   509	// It must be told how large the object at h is for efficiency.
   510	// h must describe the initial word of the object.
   511	func (h heapBits) hasPointers(size uintptr) bool {
   512		if size == sys.PtrSize { // 1-word objects are always pointers
   513			return true
   514		}
   515		return (*h.bitp>>h.shift)&bitMarked != 0
   516	}
   517	
   518	// isCheckmarked reports whether the heap bits have the checkmarked bit set.
   519	// It must be told how large the object at h is, because the encoding of the
   520	// checkmark bit varies by size.
   521	// h must describe the initial word of the object.
   522	func (h heapBits) isCheckmarked(size uintptr) bool {
   523		if size == sys.PtrSize {
   524			return (*h.bitp>>h.shift)&bitPointer != 0
   525		}
   526		// All multiword objects are 2-word aligned,
   527		// so we know that the initial word's 2-bit pair
   528		// and the second word's 2-bit pair are in the
   529		// same heap bitmap byte, *h.bitp.
   530		return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0
   531	}
   532	
   533	// setCheckmarked sets the checkmarked bit.
   534	// It must be told how large the object at h is, because the encoding of the
   535	// checkmark bit varies by size.
   536	// h must describe the initial word of the object.
   537	func (h heapBits) setCheckmarked(size uintptr) {
   538		if size == sys.PtrSize {
   539			atomic.Or8(h.bitp, bitPointer<<h.shift)
   540			return
   541		}
   542		atomic.Or8(h.bitp, bitMarked<<(heapBitsShift+h.shift))
   543	}
   544	
   545	// heapBitsBulkBarrier executes writebarrierptr_nostore
   546	// for every pointer slot in the memory range [p, p+size),
   547	// using the heap, data, or BSS bitmap to locate those pointer slots.
   548	// This executes the write barriers necessary after a memmove.
   549	// Both p and size must be pointer-aligned.
   550	// The range [p, p+size) must lie within a single object.
   551	//
   552	// Callers should call heapBitsBulkBarrier immediately after
   553	// calling memmove(p, src, size). This function is marked nosplit
   554	// to avoid being preempted; the GC must not stop the goroutine
   555	// between the memmove and the execution of the barriers.
   556	//
   557	// The heap bitmap is not maintained for allocations containing
   558	// no pointers at all; any caller of heapBitsBulkBarrier must first
   559	// make sure the underlying allocation contains pointers, usually
   560	// by checking typ.kind&kindNoPointers.
   561	//
   562	//go:nosplit
   563	func heapBitsBulkBarrier(p, size uintptr) {
   564		if (p|size)&(sys.PtrSize-1) != 0 {
   565			throw("heapBitsBulkBarrier: unaligned arguments")
   566		}
   567		if !writeBarrier.needed {
   568			return
   569		}
   570		if !inheap(p) {
   571			// If p is on the stack and in a higher frame than the
   572			// caller, we either need to execute write barriers on
   573			// it (which is what happens for normal stack writes
   574			// through pointers to higher frames), or we need to
   575			// force the mark termination stack scan to scan the
   576			// frame containing p.
   577			//
   578			// Executing write barriers on p is complicated in the
   579			// general case because we either need to unwind the
   580			// stack to get the stack map, or we need the type's
   581			// bitmap, which may be a GC program.
   582			//
   583			// Hence, we opt for forcing the re-scan to scan the
   584			// frame containing p, which we can do by simply
   585			// unwinding the stack barriers between the current SP
   586			// and p's frame.
   587			gp := getg().m.curg
   588			if gp != nil && gp.stack.lo <= p && p < gp.stack.hi {
   589				// Run on the system stack to give it more
   590				// stack space.
   591				systemstack(func() {
   592					gcUnwindBarriers(gp, p)
   593				})
   594				return
   595			}
   596	
   597			// If p is a global, use the data or BSS bitmaps to
   598			// execute write barriers.
   599			for datap := &firstmoduledata; datap != nil; datap = datap.next {
   600				if datap.data <= p && p < datap.edata {
   601					bulkBarrierBitmap(p, size, p-datap.data, datap.gcdatamask.bytedata)
   602					return
   603				}
   604			}
   605			for datap := &firstmoduledata; datap != nil; datap = datap.next {
   606				if datap.bss <= p && p < datap.ebss {
   607					bulkBarrierBitmap(p, size, p-datap.bss, datap.gcbssmask.bytedata)
   608					return
   609				}
   610			}
   611			return
   612		}
   613	
   614		h := heapBitsForAddr(p)
   615		for i := uintptr(0); i < size; i += sys.PtrSize {
   616			if h.isPointer() {
   617				x := (*uintptr)(unsafe.Pointer(p + i))
   618				writebarrierptr_nostore(x, *x)
   619			}
   620			h = h.next()
   621		}
   622	}
   623	
   624	// bulkBarrierBitmap executes write barriers for [p, p+size) using a
   625	// 1-bit pointer bitmap. p is assumed to start maskOffset bytes into
   626	// the data covered by the bitmap in bits.
   627	//
   628	// This is used by heapBitsBulkBarrier for writes to data and BSS.
   629	//
   630	//go:nosplit
   631	func bulkBarrierBitmap(p, size, maskOffset uintptr, bits *uint8) {
   632		word := maskOffset / sys.PtrSize
   633		bits = addb(bits, word/8)
   634		mask := uint8(1) << (word % 8)
   635	
   636		for i := uintptr(0); i < size; i += sys.PtrSize {
   637			if mask == 0 {
   638				bits = addb(bits, 1)
   639				if *bits == 0 {
   640					// Skip 8 words.
   641					i += 7 * sys.PtrSize
   642					continue
   643				}
   644				mask = 1
   645			}
   646			if *bits&mask != 0 {
   647				x := (*uintptr)(unsafe.Pointer(p + i))
   648				writebarrierptr_nostore(x, *x)
   649			}
   650			mask <<= 1
   651		}
   652	}
   653	
   654	// typeBitsBulkBarrier executes writebarrierptr_nostore
   655	// for every pointer slot in the memory range [p, p+size),
   656	// using the type bitmap to locate those pointer slots.
   657	// The type typ must correspond exactly to [p, p+size).
   658	// This executes the write barriers necessary after a copy.
   659	// Both p and size must be pointer-aligned.
   660	// The type typ must have a plain bitmap, not a GC program.
   661	// The only use of this function is in channel sends, and the
   662	// 64 kB channel element limit takes care of this for us.
   663	//
   664	// Must not be preempted because it typically runs right after memmove,
   665	// and the GC must not complete between those two.
   666	//
   667	//go:nosplit
   668	func typeBitsBulkBarrier(typ *_type, p, size uintptr) {
   669		if typ == nil {
   670			throw("runtime: typeBitsBulkBarrier without type")
   671		}
   672		if typ.size != size {
   673			println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size)
   674			throw("runtime: invalid typeBitsBulkBarrier")
   675		}
   676		if typ.kind&kindGCProg != 0 {
   677			println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog")
   678			throw("runtime: invalid typeBitsBulkBarrier")
   679		}
   680		if !writeBarrier.needed {
   681			return
   682		}
   683		ptrmask := typ.gcdata
   684		var bits uint32
   685		for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
   686			if i&(sys.PtrSize*8-1) == 0 {
   687				bits = uint32(*ptrmask)
   688				ptrmask = addb(ptrmask, 1)
   689			} else {
   690				bits = bits >> 1
   691			}
   692			if bits&1 != 0 {
   693				x := (*uintptr)(unsafe.Pointer(p + i))
   694				writebarrierptr_nostore(x, *x)
   695			}
   696		}
   697	}
   698	
   699	// The methods operating on spans all require that h has been returned
   700	// by heapBitsForSpan and that size, n, total are the span layout description
   701	// returned by the mspan's layout method.
   702	// If total > size*n, it means that there is extra leftover memory in the span,
   703	// usually due to rounding.
   704	//
   705	// TODO(rsc): Perhaps introduce a different heapBitsSpan type.
   706	
   707	// initSpan initializes the heap bitmap for a span.
   708	// It clears all checkmark bits.
   709	// If this is a span of pointer-sized objects, it initializes all
   710	// words to pointer/scan.
   711	// Otherwise, it initializes all words to scalar/dead.
   712	func (h heapBits) initSpan(s *mspan) {
   713		size, n, total := s.layout()
   714	
   715		// Init the markbit structures
   716		s.freeindex = 0
   717		s.allocCache = ^uint64(0) // all 1s indicating all free.
   718		s.nelems = n
   719		s.allocBits = nil
   720		s.gcmarkBits = nil
   721		s.gcmarkBits = newMarkBits(s.nelems)
   722		s.allocBits = newAllocBits(s.nelems)
   723	
   724		// Clear bits corresponding to objects.
   725		if total%heapBitmapScale != 0 {
   726			throw("initSpan: unaligned length")
   727		}
   728		nbyte := total / heapBitmapScale
   729		if sys.PtrSize == 8 && size == sys.PtrSize {
   730			end := h.bitp
   731			bitp := subtractb(end, nbyte-1)
   732			for {
   733				*bitp = bitPointerAll | bitMarkedAll
   734				if bitp == end {
   735					break
   736				}
   737				bitp = add1(bitp)
   738			}
   739			return
   740		}
   741		memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
   742	}
   743	
   744	// initCheckmarkSpan initializes a span for being checkmarked.
   745	// It clears the checkmark bits, which are set to 1 in normal operation.
   746	func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
   747		// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
   748		if sys.PtrSize == 8 && size == sys.PtrSize {
   749			// Checkmark bit is type bit, bottom bit of every 2-bit entry.
   750			// Only possible on 64-bit system, since minimum size is 8.
   751			// Must clear type bit (checkmark bit) of every word.
   752			// The type bit is the lower of every two-bit pair.
   753			bitp := h.bitp
   754			for i := uintptr(0); i < n; i += 4 {
   755				*bitp &^= bitPointerAll
   756				bitp = subtract1(bitp)
   757			}
   758			return
   759		}
   760		for i := uintptr(0); i < n; i++ {
   761			*h.bitp &^= bitMarked << (heapBitsShift + h.shift)
   762			h = h.forward(size / sys.PtrSize)
   763		}
   764	}
   765	
   766	// clearCheckmarkSpan undoes all the checkmarking in a span.
   767	// The actual checkmark bits are ignored, so the only work to do
   768	// is to fix the pointer bits. (Pointer bits are ignored by scanobject
   769	// but consulted by typedmemmove.)
   770	func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
   771		// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
   772		if sys.PtrSize == 8 && size == sys.PtrSize {
   773			// Checkmark bit is type bit, bottom bit of every 2-bit entry.
   774			// Only possible on 64-bit system, since minimum size is 8.
   775			// Must clear type bit (checkmark bit) of every word.
   776			// The type bit is the lower of every two-bit pair.
   777			bitp := h.bitp
   778			for i := uintptr(0); i < n; i += 4 {
   779				*bitp |= bitPointerAll
   780				bitp = subtract1(bitp)
   781			}
   782		}
   783	}
   784	
   785	// oneBitCount is indexed by byte and produces the
   786	// number of 1 bits in that byte. For example 128 has 1 bit set
   787	// and oneBitCount[128] will holds 1.
   788	var oneBitCount = [256]uint8{
   789		0, 1, 1, 2, 1, 2, 2, 3,
   790		1, 2, 2, 3, 2, 3, 3, 4,
   791		1, 2, 2, 3, 2, 3, 3, 4,
   792		2, 3, 3, 4, 3, 4, 4, 5,
   793		1, 2, 2, 3, 2, 3, 3, 4,
   794		2, 3, 3, 4, 3, 4, 4, 5,
   795		2, 3, 3, 4, 3, 4, 4, 5,
   796		3, 4, 4, 5, 4, 5, 5, 6,
   797		1, 2, 2, 3, 2, 3, 3, 4,
   798		2, 3, 3, 4, 3, 4, 4, 5,
   799		2, 3, 3, 4, 3, 4, 4, 5,
   800		3, 4, 4, 5, 4, 5, 5, 6,
   801		2, 3, 3, 4, 3, 4, 4, 5,
   802		3, 4, 4, 5, 4, 5, 5, 6,
   803		3, 4, 4, 5, 4, 5, 5, 6,
   804		4, 5, 5, 6, 5, 6, 6, 7,
   805		1, 2, 2, 3, 2, 3, 3, 4,
   806		2, 3, 3, 4, 3, 4, 4, 5,
   807		2, 3, 3, 4, 3, 4, 4, 5,
   808		3, 4, 4, 5, 4, 5, 5, 6,
   809		2, 3, 3, 4, 3, 4, 4, 5,
   810		3, 4, 4, 5, 4, 5, 5, 6,
   811		3, 4, 4, 5, 4, 5, 5, 6,
   812		4, 5, 5, 6, 5, 6, 6, 7,
   813		2, 3, 3, 4, 3, 4, 4, 5,
   814		3, 4, 4, 5, 4, 5, 5, 6,
   815		3, 4, 4, 5, 4, 5, 5, 6,
   816		4, 5, 5, 6, 5, 6, 6, 7,
   817		3, 4, 4, 5, 4, 5, 5, 6,
   818		4, 5, 5, 6, 5, 6, 6, 7,
   819		4, 5, 5, 6, 5, 6, 6, 7,
   820		5, 6, 6, 7, 6, 7, 7, 8}
   821	
   822	// countFree runs through the mark bits in a span and counts the number of free objects
   823	// in the span.
   824	// TODO:(rlh) Use popcount intrinsic.
   825	func (s *mspan) countFree() int {
   826		count := 0
   827		maxIndex := s.nelems / 8
   828		for i := uintptr(0); i < maxIndex; i++ {
   829			mrkBits := *addb(s.gcmarkBits, i)
   830			count += int(oneBitCount[mrkBits])
   831		}
   832		if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
   833			mrkBits := *addb(s.gcmarkBits, maxIndex)
   834			mask := uint8((1 << bitsInLastByte) - 1)
   835			bits := mrkBits & mask
   836			count += int(oneBitCount[bits])
   837		}
   838		return int(s.nelems) - count
   839	}
   840	
   841	// heapBitsSetType records that the new allocation [x, x+size)
   842	// holds in [x, x+dataSize) one or more values of type typ.
   843	// (The number of values is given by dataSize / typ.size.)
   844	// If dataSize < size, the fragment [x+dataSize, x+size) is
   845	// recorded as non-pointer data.
   846	// It is known that the type has pointers somewhere;
   847	// malloc does not call heapBitsSetType when there are no pointers,
   848	// because all free objects are marked as noscan during
   849	// heapBitsSweepSpan.
   850	//
   851	// There can only be one allocation from a given span active at a time,
   852	// and the bitmap for a span always falls on byte boundaries,
   853	// so there are no write-write races for access to the heap bitmap.
   854	// Hence, heapBitsSetType can access the bitmap without atomics.
   855	//
   856	// There can be read-write races between heapBitsSetType and things
   857	// that read the heap bitmap like scanobject. However, since
   858	// heapBitsSetType is only used for objects that have not yet been
   859	// made reachable, readers will ignore bits being modified by this
   860	// function. This does mean this function cannot transiently modify
   861	// bits that belong to neighboring objects. Also, on weakly-ordered
   862	// machines, callers must execute a store/store (publication) barrier
   863	// between calling this function and making the object reachable.
   864	//
   865	// TODO: This still has atomic accesses left over from when it could
   866	// race with GC accessing mark bits in the bitmap. Remove these.
   867	func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
   868		const doubleCheck = false // slow but helpful; enable to test modifications to this code
   869	
   870		// dataSize is always size rounded up to the next malloc size class,
   871		// except in the case of allocating a defer block, in which case
   872		// size is sizeof(_defer{}) (at least 6 words) and dataSize may be
   873		// arbitrarily larger.
   874		//
   875		// The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
   876		// assume that dataSize == size without checking it explicitly.
   877	
   878		if sys.PtrSize == 8 && size == sys.PtrSize {
   879			// It's one word and it has pointers, it must be a pointer.
   880			// In general we'd need an atomic update here if the
   881			// concurrent GC were marking objects in this span,
   882			// because each bitmap byte describes 3 other objects
   883			// in addition to the one being allocated.
   884			// However, since all allocated one-word objects are pointers
   885			// (non-pointers are aggregated into tinySize allocations),
   886			// initSpan sets the pointer bits for us. Nothing to do here.
   887			if doubleCheck {
   888				h := heapBitsForAddr(x)
   889				if !h.isPointer() {
   890					throw("heapBitsSetType: pointer bit missing")
   891				}
   892				if !h.morePointers() {
   893					throw("heapBitsSetType: scan bit missing")
   894				}
   895			}
   896			return
   897		}
   898	
   899		h := heapBitsForAddr(x)
   900		ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
   901	
   902		// Heap bitmap bits for 2-word object are only 4 bits,
   903		// so also shared with objects next to it; use atomic updates.
   904		// This is called out as a special case primarily for 32-bit systems,
   905		// so that on 32-bit systems the code below can assume all objects
   906		// are 4-word aligned (because they're all 16-byte aligned).
   907		if size == 2*sys.PtrSize {
   908			if typ.size == sys.PtrSize {
   909				// We're allocating a block big enough to hold two pointers.
   910				// On 64-bit, that means the actual object must be two pointers,
   911				// or else we'd have used the one-pointer-sized block.
   912				// On 32-bit, however, this is the 8-byte block, the smallest one.
   913				// So it could be that we're allocating one pointer and this was
   914				// just the smallest block available. Distinguish by checking dataSize.
   915				// (In general the number of instances of typ being allocated is
   916				// dataSize/typ.size.)
   917				if sys.PtrSize == 4 && dataSize == sys.PtrSize {
   918					// 1 pointer object. On 32-bit machines clear the bit for the
   919					// unused second word.
   920					if gcphase == _GCoff {
   921						*h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
   922						*h.bitp |= (bitPointer | bitMarked) << h.shift
   923					} else {
   924						atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
   925						atomic.Or8(h.bitp, (bitPointer|bitMarked)<<h.shift)
   926					}
   927				} else {
   928					// 2-element slice of pointer.
   929					if gcphase == _GCoff {
   930						*h.bitp |= (bitPointer | bitMarked | bitPointer<<heapBitsShift) << h.shift
   931					} else {
   932						atomic.Or8(h.bitp, (bitPointer|bitMarked|bitPointer<<heapBitsShift)<<h.shift)
   933					}
   934				}
   935				return
   936			}
   937			// Otherwise typ.size must be 2*sys.PtrSize,
   938			// and typ.kind&kindGCProg == 0.
   939			if doubleCheck {
   940				if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
   941					print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
   942					throw("heapBitsSetType")
   943				}
   944			}
   945			b := uint32(*ptrmask)
   946			hb := (b & 3) | bitMarked
   947			if gcphase == _GCoff {
   948				// bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1.
   949				// 110011 is shifted h.shift and complemented.
   950				// This clears out the bits that are about to be
   951				// ored into *h.hbitp in the next instructions.
   952				*h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
   953				*h.bitp |= uint8(hb << h.shift)
   954			} else {
   955				// TODO:(rlh) since the GC is not concurrently setting the
   956				// mark bits in the heap map anymore and malloc
   957				// owns the span we are allocating in why does this have
   958				// to be atomic?
   959	
   960				atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
   961				atomic.Or8(h.bitp, uint8(hb<<h.shift))
   962			}
   963			return
   964		}
   965	
   966		// Copy from 1-bit ptrmask into 2-bit bitmap.
   967		// The basic approach is to use a single uintptr as a bit buffer,
   968		// alternating between reloading the buffer and writing bitmap bytes.
   969		// In general, one load can supply two bitmap byte writes.
   970		// This is a lot of lines of code, but it compiles into relatively few
   971		// machine instructions.
   972	
   973		var (
   974			// Ptrmask input.
   975			p     *byte   // last ptrmask byte read
   976			b     uintptr // ptrmask bits already loaded
   977			nb    uintptr // number of bits in b at next read
   978			endp  *byte   // final ptrmask byte to read (then repeat)
   979			endnb uintptr // number of valid bits in *endp
   980			pbits uintptr // alternate source of bits
   981	
   982			// Heap bitmap output.
   983			w     uintptr // words processed
   984			nw    uintptr // number of words to process
   985			hbitp *byte   // next heap bitmap byte to write
   986			hb    uintptr // bits being prepared for *hbitp
   987		)
   988	
   989		hbitp = h.bitp
   990	
   991		// Handle GC program. Delayed until this part of the code
   992		// so that we can use the same double-checking mechanism
   993		// as the 1-bit case. Nothing above could have encountered
   994		// GC programs: the cases were all too small.
   995		if typ.kind&kindGCProg != 0 {
   996			heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
   997			if doubleCheck {
   998				// Double-check the heap bits written by GC program
   999				// by running the GC program to create a 1-bit pointer mask
  1000				// and then jumping to the double-check code below.
  1001				// This doesn't catch bugs shared between the 1-bit and 4-bit
  1002				// GC program execution, but it does catch mistakes specific
  1003				// to just one of those and bugs in heapBitsSetTypeGCProg's
  1004				// implementation of arrays.
  1005				lock(&debugPtrmask.lock)
  1006				if debugPtrmask.data == nil {
  1007					debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
  1008				}
  1009				ptrmask = debugPtrmask.data
  1010				runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
  1011				goto Phase4
  1012			}
  1013			return
  1014		}
  1015	
  1016		// Note about sizes:
  1017		//
  1018		// typ.size is the number of words in the object,
  1019		// and typ.ptrdata is the number of words in the prefix
  1020		// of the object that contains pointers. That is, the final
  1021		// typ.size - typ.ptrdata words contain no pointers.
  1022		// This allows optimization of a common pattern where
  1023		// an object has a small header followed by a large scalar
  1024		// buffer. If we know the pointers are over, we don't have
  1025		// to scan the buffer's heap bitmap at all.
  1026		// The 1-bit ptrmasks are sized to contain only bits for
  1027		// the typ.ptrdata prefix, zero padded out to a full byte
  1028		// of bitmap. This code sets nw (below) so that heap bitmap
  1029		// bits are only written for the typ.ptrdata prefix; if there is
  1030		// more room in the allocated object, the next heap bitmap
  1031		// entry is a 00, indicating that there are no more pointers
  1032		// to scan. So only the ptrmask for the ptrdata bytes is needed.
  1033		//
  1034		// Replicated copies are not as nice: if there is an array of
  1035		// objects with scalar tails, all but the last tail does have to
  1036		// be initialized, because there is no way to say "skip forward".
  1037		// However, because of the possibility of a repeated type with
  1038		// size not a multiple of 4 pointers (one heap bitmap byte),
  1039		// the code already must handle the last ptrmask byte specially
  1040		// by treating it as containing only the bits for endnb pointers,
  1041		// where endnb <= 4. We represent large scalar tails that must
  1042		// be expanded in the replication by setting endnb larger than 4.
  1043		// This will have the effect of reading many bits out of b,
  1044		// but once the real bits are shifted out, b will supply as many
  1045		// zero bits as we try to read, which is exactly what we need.
  1046	
  1047		p = ptrmask
  1048		if typ.size < dataSize {
  1049			// Filling in bits for an array of typ.
  1050			// Set up for repetition of ptrmask during main loop.
  1051			// Note that ptrmask describes only a prefix of
  1052			const maxBits = sys.PtrSize*8 - 7
  1053			if typ.ptrdata/sys.PtrSize <= maxBits {
  1054				// Entire ptrmask fits in uintptr with room for a byte fragment.
  1055				// Load into pbits and never read from ptrmask again.
  1056				// This is especially important when the ptrmask has
  1057				// fewer than 8 bits in it; otherwise the reload in the middle
  1058				// of the Phase 2 loop would itself need to loop to gather
  1059				// at least 8 bits.
  1060	
  1061				// Accumulate ptrmask into b.
  1062				// ptrmask is sized to describe only typ.ptrdata, but we record
  1063				// it as describing typ.size bytes, since all the high bits are zero.
  1064				nb = typ.ptrdata / sys.PtrSize
  1065				for i := uintptr(0); i < nb; i += 8 {
  1066					b |= uintptr(*p) << i
  1067					p = add1(p)
  1068				}
  1069				nb = typ.size / sys.PtrSize
  1070	
  1071				// Replicate ptrmask to fill entire pbits uintptr.
  1072				// Doubling and truncating is fewer steps than
  1073				// iterating by nb each time. (nb could be 1.)
  1074				// Since we loaded typ.ptrdata/sys.PtrSize bits
  1075				// but are pretending to have typ.size/sys.PtrSize,
  1076				// there might be no replication necessary/possible.
  1077				pbits = b
  1078				endnb = nb
  1079				if nb+nb <= maxBits {
  1080					for endnb <= sys.PtrSize*8 {
  1081						pbits |= pbits << endnb
  1082						endnb += endnb
  1083					}
  1084					// Truncate to a multiple of original ptrmask.
  1085					endnb = maxBits / nb * nb
  1086					pbits &= 1<<endnb - 1
  1087					b = pbits
  1088					nb = endnb
  1089				}
  1090	
  1091				// Clear p and endp as sentinel for using pbits.
  1092				// Checked during Phase 2 loop.
  1093				p = nil
  1094				endp = nil
  1095			} else {
  1096				// Ptrmask is larger. Read it multiple times.
  1097				n := (typ.ptrdata/sys.PtrSize+7)/8 - 1
  1098				endp = addb(ptrmask, n)
  1099				endnb = typ.size/sys.PtrSize - n*8
  1100			}
  1101		}
  1102		if p != nil {
  1103			b = uintptr(*p)
  1104			p = add1(p)
  1105			nb = 8
  1106		}
  1107	
  1108		if typ.size == dataSize {
  1109			// Single entry: can stop once we reach the non-pointer data.
  1110			nw = typ.ptrdata / sys.PtrSize
  1111		} else {
  1112			// Repeated instances of typ in an array.
  1113			// Have to process first N-1 entries in full, but can stop
  1114			// once we reach the non-pointer data in the final entry.
  1115			nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize
  1116		}
  1117		if nw == 0 {
  1118			// No pointers! Caller was supposed to check.
  1119			println("runtime: invalid type ", typ.string())
  1120			throw("heapBitsSetType: called with non-pointer type")
  1121			return
  1122		}
  1123		if nw < 2 {
  1124			// Must write at least 2 words, because the "no scan"
  1125			// encoding doesn't take effect until the third word.
  1126			nw = 2
  1127		}
  1128	
  1129		// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
  1130		// The leading byte is special because it contains the bits for word 1,
  1131		// which does not have the marked bits set.
  1132		// The leading half-byte is special because it's a half a byte and must be
  1133		// manipulated atomically.
  1134		switch {
  1135		default:
  1136			throw("heapBitsSetType: unexpected shift")
  1137	
  1138		case h.shift == 0:
  1139			// Ptrmask and heap bitmap are aligned.
  1140			// Handle first byte of bitmap specially.
  1141			//
  1142			// The first byte we write out covers the first four
  1143			// words of the object. The scan/dead bit on the first
  1144			// word must be set to scan since there are pointers
  1145			// somewhere in the object. The scan/dead bit on the
  1146			// second word is the checkmark, so we don't set it.
  1147			// In all following words, we set the scan/dead
  1148			// appropriately to indicate that the object contains
  1149			// to the next 2-bit entry in the bitmap.
  1150			//
  1151			// TODO: It doesn't matter if we set the checkmark, so
  1152			// maybe this case isn't needed any more.
  1153			hb = b & bitPointerAll
  1154			hb |= bitMarked | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
  1155			if w += 4; w >= nw {
  1156				goto Phase3
  1157			}
  1158			*hbitp = uint8(hb)
  1159			hbitp = subtract1(hbitp)
  1160			b >>= 4
  1161			nb -= 4
  1162	
  1163		case sys.PtrSize == 8 && h.shift == 2:
  1164			// Ptrmask and heap bitmap are misaligned.
  1165			// The bits for the first two words are in a byte shared with another object
  1166			// and must be updated atomically.
  1167			// NOTE(rsc): The atomic here may not be necessary.
  1168			// We took care of 1-word and 2-word objects above,
  1169			// so this is at least a 6-word object, so our start bits
  1170			// are shared only with the type bits of another object,
  1171			// not with its mark bit. Since there is only one allocation
  1172			// from a given span at a time, we should be able to set
  1173			// these bits non-atomically. Not worth the risk right now.
  1174			hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
  1175			// This is not noscan, so set the scan bit in the
  1176			// first word.
  1177			hb |= bitMarked << (2 * heapBitsShift)
  1178			b >>= 2
  1179			nb -= 2
  1180			// Note: no bitMarker for second word because that's
  1181			// the checkmark.
  1182			if gcphase == _GCoff {
  1183				*hbitp &^= uint8((bitPointer | bitMarked | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
  1184				*hbitp |= uint8(hb)
  1185			} else {
  1186				atomic.And8(hbitp, ^(uint8(bitPointer|bitMarked|bitPointer<<heapBitsShift) << (2 * heapBitsShift)))
  1187				atomic.Or8(hbitp, uint8(hb))
  1188			}
  1189			hbitp = subtract1(hbitp)
  1190			if w += 2; w >= nw {
  1191				// We know that there is more data, because we handled 2-word objects above.
  1192				// This must be at least a 6-word object. If we're out of pointer words,
  1193				// mark no scan in next bitmap byte and finish.
  1194				hb = 0
  1195				w += 4
  1196				goto Phase3
  1197			}
  1198		}
  1199	
  1200		// Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
  1201		// The loop computes the bits for that last write but does not execute the write;
  1202		// it leaves the bits in hb for processing by phase 3.
  1203		// To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
  1204		// use in the first half of the loop right now, and then we only adjust nb explicitly
  1205		// if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
  1206		nb -= 4
  1207		for {
  1208			// Emit bitmap byte.
  1209			// b has at least nb+4 bits, with one exception:
  1210			// if w+4 >= nw, then b has only nw-w bits,
  1211			// but we'll stop at the break and then truncate
  1212			// appropriately in Phase 3.
  1213			hb = b & bitPointerAll
  1214			hb |= bitMarkedAll
  1215			if w += 4; w >= nw {
  1216				break
  1217			}
  1218			*hbitp = uint8(hb)
  1219			hbitp = subtract1(hbitp)
  1220			b >>= 4
  1221	
  1222			// Load more bits. b has nb right now.
  1223			if p != endp {
  1224				// Fast path: keep reading from ptrmask.
  1225				// nb unmodified: we just loaded 8 bits,
  1226				// and the next iteration will consume 8 bits,
  1227				// leaving us with the same nb the next time we're here.
  1228				if nb < 8 {
  1229					b |= uintptr(*p) << nb
  1230					p = add1(p)
  1231				} else {
  1232					// Reduce the number of bits in b.
  1233					// This is important if we skipped
  1234					// over a scalar tail, since nb could
  1235					// be larger than the bit width of b.
  1236					nb -= 8
  1237				}
  1238			} else if p == nil {
  1239				// Almost as fast path: track bit count and refill from pbits.
  1240				// For short repetitions.
  1241				if nb < 8 {
  1242					b |= pbits << nb
  1243					nb += endnb
  1244				}
  1245				nb -= 8 // for next iteration
  1246			} else {
  1247				// Slow path: reached end of ptrmask.
  1248				// Process final partial byte and rewind to start.
  1249				b |= uintptr(*p) << nb
  1250				nb += endnb
  1251				if nb < 8 {
  1252					b |= uintptr(*ptrmask) << nb
  1253					p = add1(ptrmask)
  1254				} else {
  1255					nb -= 8
  1256					p = ptrmask
  1257				}
  1258			}
  1259	
  1260			// Emit bitmap byte.
  1261			hb = b & bitPointerAll
  1262			hb |= bitMarkedAll
  1263			if w += 4; w >= nw {
  1264				break
  1265			}
  1266			*hbitp = uint8(hb)
  1267			hbitp = subtract1(hbitp)
  1268			b >>= 4
  1269		}
  1270	
  1271	Phase3:
  1272		// Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
  1273		if w > nw {
  1274			// Counting the 4 entries in hb not yet written to memory,
  1275			// there are more entries than possible pointer slots.
  1276			// Discard the excess entries (can't be more than 3).
  1277			mask := uintptr(1)<<(4-(w-nw)) - 1
  1278			hb &= mask | mask<<4 // apply mask to both pointer bits and mark bits
  1279		}
  1280	
  1281		// Change nw from counting possibly-pointer words to total words in allocation.
  1282		nw = size / sys.PtrSize
  1283	
  1284		// Write whole bitmap bytes.
  1285		// The first is hb, the rest are zero.
  1286		if w <= nw {
  1287			*hbitp = uint8(hb)
  1288			hbitp = subtract1(hbitp)
  1289			hb = 0 // for possible final half-byte below
  1290			for w += 4; w <= nw; w += 4 {
  1291				*hbitp = 0
  1292				hbitp = subtract1(hbitp)
  1293			}
  1294		}
  1295	
  1296		// Write final partial bitmap byte if any.
  1297		// We know w > nw, or else we'd still be in the loop above.
  1298		// It can be bigger only due to the 4 entries in hb that it counts.
  1299		// If w == nw+4 then there's nothing left to do: we wrote all nw entries
  1300		// and can discard the 4 sitting in hb.
  1301		// But if w == nw+2, we need to write first two in hb.
  1302		// The byte is shared with the next object so we may need an atomic.
  1303		if w == nw+2 {
  1304			if gcphase == _GCoff {
  1305				*hbitp = *hbitp&^(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift) | uint8(hb)
  1306			} else {
  1307				atomic.And8(hbitp, ^uint8(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift))
  1308				atomic.Or8(hbitp, uint8(hb))
  1309			}
  1310		}
  1311	
  1312	Phase4:
  1313		// Phase 4: all done, but perhaps double check.
  1314		if doubleCheck {
  1315			end := heapBitsForAddr(x + size)
  1316			if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
  1317				println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
  1318				print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
  1319				print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
  1320				h0 := heapBitsForAddr(x)
  1321				print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
  1322				print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
  1323				throw("bad heapBitsSetType")
  1324			}
  1325	
  1326			// Double-check that bits to be written were written correctly.
  1327			// Does not check that other bits were not written, unfortunately.
  1328			h := heapBitsForAddr(x)
  1329			nptr := typ.ptrdata / sys.PtrSize
  1330			ndata := typ.size / sys.PtrSize
  1331			count := dataSize / typ.size
  1332			totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize
  1333			for i := uintptr(0); i < size/sys.PtrSize; i++ {
  1334				j := i % ndata
  1335				var have, want uint8
  1336				have = (*h.bitp >> h.shift) & (bitPointer | bitMarked)
  1337				if i >= totalptr {
  1338					want = 0 // deadmarker
  1339					if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
  1340						want = bitMarked
  1341					}
  1342				} else {
  1343					if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
  1344						want |= bitPointer
  1345					}
  1346					if i != 1 {
  1347						want |= bitMarked
  1348					} else {
  1349						have &^= bitMarked
  1350					}
  1351				}
  1352				if have != want {
  1353					println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
  1354					print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
  1355					print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
  1356					print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
  1357					h0 := heapBitsForAddr(x)
  1358					print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
  1359					print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
  1360					print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
  1361					println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want)
  1362					if typ.kind&kindGCProg != 0 {
  1363						println("GC program:")
  1364						dumpGCProg(addb(typ.gcdata, 4))
  1365					}
  1366					throw("bad heapBitsSetType")
  1367				}
  1368				h = h.next()
  1369			}
  1370			if ptrmask == debugPtrmask.data {
  1371				unlock(&debugPtrmask.lock)
  1372			}
  1373		}
  1374	}
  1375	
  1376	// heapBitsSetTypeNoScan marks x as noscan by setting the first word
  1377	// of x in the heap bitmap to scalar/dead.
  1378	func heapBitsSetTypeNoScan(x uintptr) {
  1379		h := heapBitsForAddr(uintptr(x))
  1380		*h.bitp &^= (bitPointer | bitMarked) << h.shift
  1381	}
  1382	
  1383	var debugPtrmask struct {
  1384		lock mutex
  1385		data *byte
  1386	}
  1387	
  1388	// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
  1389	// progSize is the size of the memory described by the program.
  1390	// elemSize is the size of the element that the GC program describes (a prefix of).
  1391	// dataSize is the total size of the intended data, a multiple of elemSize.
  1392	// allocSize is the total size of the allocated memory.
  1393	//
  1394	// GC programs are only used for large allocations.
  1395	// heapBitsSetType requires that allocSize is a multiple of 4 words,
  1396	// so that the relevant bitmap bytes are not shared with surrounding
  1397	// objects and need not be accessed with atomic instructions.
  1398	func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
  1399		if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 {
  1400			// Alignment will be wrong.
  1401			throw("heapBitsSetTypeGCProg: small allocation")
  1402		}
  1403		var totalBits uintptr
  1404		if elemSize == dataSize {
  1405			totalBits = runGCProg(prog, nil, h.bitp, 2)
  1406			if totalBits*sys.PtrSize != progSize {
  1407				println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
  1408				throw("heapBitsSetTypeGCProg: unexpected bit count")
  1409			}
  1410		} else {
  1411			count := dataSize / elemSize
  1412	
  1413			// Piece together program trailer to run after prog that does:
  1414			//	literal(0)
  1415			//	repeat(1, elemSize-progSize-1) // zeros to fill element size
  1416			//	repeat(elemSize, count-1) // repeat that element for count
  1417			// This zero-pads the data remaining in the first element and then
  1418			// repeats that first element to fill the array.
  1419			var trailer [40]byte // 3 varints (max 10 each) + some bytes
  1420			i := 0
  1421			if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 {
  1422				// literal(0)
  1423				trailer[i] = 0x01
  1424				i++
  1425				trailer[i] = 0
  1426				i++
  1427				if n > 1 {
  1428					// repeat(1, n-1)
  1429					trailer[i] = 0x81
  1430					i++
  1431					n--
  1432					for ; n >= 0x80; n >>= 7 {
  1433						trailer[i] = byte(n | 0x80)
  1434						i++
  1435					}
  1436					trailer[i] = byte(n)
  1437					i++
  1438				}
  1439			}
  1440			// repeat(elemSize/ptrSize, count-1)
  1441			trailer[i] = 0x80
  1442			i++
  1443			n := elemSize / sys.PtrSize
  1444			for ; n >= 0x80; n >>= 7 {
  1445				trailer[i] = byte(n | 0x80)
  1446				i++
  1447			}
  1448			trailer[i] = byte(n)
  1449			i++
  1450			n = count - 1
  1451			for ; n >= 0x80; n >>= 7 {
  1452				trailer[i] = byte(n | 0x80)
  1453				i++
  1454			}
  1455			trailer[i] = byte(n)
  1456			i++
  1457			trailer[i] = 0
  1458			i++
  1459	
  1460			runGCProg(prog, &trailer[0], h.bitp, 2)
  1461	
  1462			// Even though we filled in the full array just now,
  1463			// record that we only filled in up to the ptrdata of the
  1464			// last element. This will cause the code below to
  1465			// memclr the dead section of the final array element,
  1466			// so that scanobject can stop early in the final element.
  1467			totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
  1468		}
  1469		endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
  1470		endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
  1471		memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
  1472	}
  1473	
  1474	// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
  1475	// size the size of the region described by prog, in bytes.
  1476	// The resulting bitvector will have no more than size/sys.PtrSize bits.
  1477	func progToPointerMask(prog *byte, size uintptr) bitvector {
  1478		n := (size/sys.PtrSize + 7) / 8
  1479		x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
  1480		x[len(x)-1] = 0xa1 // overflow check sentinel
  1481		n = runGCProg(prog, nil, &x[0], 1)
  1482		if x[len(x)-1] != 0xa1 {
  1483			throw("progToPointerMask: overflow")
  1484		}
  1485		return bitvector{int32(n), &x[0]}
  1486	}
  1487	
  1488	// Packed GC pointer bitmaps, aka GC programs.
  1489	//
  1490	// For large types containing arrays, the type information has a
  1491	// natural repetition that can be encoded to save space in the
  1492	// binary and in the memory representation of the type information.
  1493	//
  1494	// The encoding is a simple Lempel-Ziv style bytecode machine
  1495	// with the following instructions:
  1496	//
  1497	//	00000000: stop
  1498	//	0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
  1499	//	10000000 n c: repeat the previous n bits c times; n, c are varints
  1500	//	1nnnnnnn c: repeat the previous n bits c times; c is a varint
  1501	
  1502	// runGCProg executes the GC program prog, and then trailer if non-nil,
  1503	// writing to dst with entries of the given size.
  1504	// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
  1505	// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
  1506	// starting at dst (because the heap bitmap does). In this case, the caller guarantees
  1507	// that only whole bytes in dst need to be written.
  1508	//
  1509	// runGCProg returns the number of 1- or 2-bit entries written to memory.
  1510	func runGCProg(prog, trailer, dst *byte, size int) uintptr {
  1511		dstStart := dst
  1512	
  1513		// Bits waiting to be written to memory.
  1514		var bits uintptr
  1515		var nbits uintptr
  1516	
  1517		p := prog
  1518	Run:
  1519		for {
  1520			// Flush accumulated full bytes.
  1521			// The rest of the loop assumes that nbits <= 7.
  1522			for ; nbits >= 8; nbits -= 8 {
  1523				if size == 1 {
  1524					*dst = uint8(bits)
  1525					dst = add1(dst)
  1526					bits >>= 8
  1527				} else {
  1528					v := bits&bitPointerAll | bitMarkedAll
  1529					*dst = uint8(v)
  1530					dst = subtract1(dst)
  1531					bits >>= 4
  1532					v = bits&bitPointerAll | bitMarkedAll
  1533					*dst = uint8(v)
  1534					dst = subtract1(dst)
  1535					bits >>= 4
  1536				}
  1537			}
  1538	
  1539			// Process one instruction.
  1540			inst := uintptr(*p)
  1541			p = add1(p)
  1542			n := inst & 0x7F
  1543			if inst&0x80 == 0 {
  1544				// Literal bits; n == 0 means end of program.
  1545				if n == 0 {
  1546					// Program is over; continue in trailer if present.
  1547					if trailer != nil {
  1548						//println("trailer")
  1549						p = trailer
  1550						trailer = nil
  1551						continue
  1552					}
  1553					//println("done")
  1554					break Run
  1555				}
  1556				//println("lit", n, dst)
  1557				nbyte := n / 8
  1558				for i := uintptr(0); i < nbyte; i++ {
  1559					bits |= uintptr(*p) << nbits
  1560					p = add1(p)
  1561					if size == 1 {
  1562						*dst = uint8(bits)
  1563						dst = add1(dst)
  1564						bits >>= 8
  1565					} else {
  1566						v := bits&0xf | bitMarkedAll
  1567						*dst = uint8(v)
  1568						dst = subtract1(dst)
  1569						bits >>= 4
  1570						v = bits&0xf | bitMarkedAll
  1571						*dst = uint8(v)
  1572						dst = subtract1(dst)
  1573						bits >>= 4
  1574					}
  1575				}
  1576				if n %= 8; n > 0 {
  1577					bits |= uintptr(*p) << nbits
  1578					p = add1(p)
  1579					nbits += n
  1580				}
  1581				continue Run
  1582			}
  1583	
  1584			// Repeat. If n == 0, it is encoded in a varint in the next bytes.
  1585			if n == 0 {
  1586				for off := uint(0); ; off += 7 {
  1587					x := uintptr(*p)
  1588					p = add1(p)
  1589					n |= (x & 0x7F) << off
  1590					if x&0x80 == 0 {
  1591						break
  1592					}
  1593				}
  1594			}
  1595	
  1596			// Count is encoded in a varint in the next bytes.
  1597			c := uintptr(0)
  1598			for off := uint(0); ; off += 7 {
  1599				x := uintptr(*p)
  1600				p = add1(p)
  1601				c |= (x & 0x7F) << off
  1602				if x&0x80 == 0 {
  1603					break
  1604				}
  1605			}
  1606			c *= n // now total number of bits to copy
  1607	
  1608			// If the number of bits being repeated is small, load them
  1609			// into a register and use that register for the entire loop
  1610			// instead of repeatedly reading from memory.
  1611			// Handling fewer than 8 bits here makes the general loop simpler.
  1612			// The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
  1613			// the pattern to a bit buffer holding at most 7 bits (a partial byte)
  1614			// it will not overflow.
  1615			src := dst
  1616			const maxBits = sys.PtrSize*8 - 7
  1617			if n <= maxBits {
  1618				// Start with bits in output buffer.
  1619				pattern := bits
  1620				npattern := nbits
  1621	
  1622				// If we need more bits, fetch them from memory.
  1623				if size == 1 {
  1624					src = subtract1(src)
  1625					for npattern < n {
  1626						pattern <<= 8
  1627						pattern |= uintptr(*src)
  1628						src = subtract1(src)
  1629						npattern += 8
  1630					}
  1631				} else {
  1632					src = add1(src)
  1633					for npattern < n {
  1634						pattern <<= 4
  1635						pattern |= uintptr(*src) & 0xf
  1636						src = add1(src)
  1637						npattern += 4
  1638					}
  1639				}
  1640	
  1641				// We started with the whole bit output buffer,
  1642				// and then we loaded bits from whole bytes.
  1643				// Either way, we might now have too many instead of too few.
  1644				// Discard the extra.
  1645				if npattern > n {
  1646					pattern >>= npattern - n
  1647					npattern = n
  1648				}
  1649	
  1650				// Replicate pattern to at most maxBits.
  1651				if npattern == 1 {
  1652					// One bit being repeated.
  1653					// If the bit is 1, make the pattern all 1s.
  1654					// If the bit is 0, the pattern is already all 0s,
  1655					// but we can claim that the number of bits
  1656					// in the word is equal to the number we need (c),
  1657					// because right shift of bits will zero fill.
  1658					if pattern == 1 {
  1659						pattern = 1<<maxBits - 1
  1660						npattern = maxBits
  1661					} else {
  1662						npattern = c
  1663					}
  1664				} else {
  1665					b := pattern
  1666					nb := npattern
  1667					if nb+nb <= maxBits {
  1668						// Double pattern until the whole uintptr is filled.
  1669						for nb <= sys.PtrSize*8 {
  1670							b |= b << nb
  1671							nb += nb
  1672						}
  1673						// Trim away incomplete copy of original pattern in high bits.
  1674						// TODO(rsc): Replace with table lookup or loop on systems without divide?
  1675						nb = maxBits / npattern * npattern
  1676						b &= 1<<nb - 1
  1677						pattern = b
  1678						npattern = nb
  1679					}
  1680				}
  1681	
  1682				// Add pattern to bit buffer and flush bit buffer, c/npattern times.
  1683				// Since pattern contains >8 bits, there will be full bytes to flush
  1684				// on each iteration.
  1685				for ; c >= npattern; c -= npattern {
  1686					bits |= pattern << nbits
  1687					nbits += npattern
  1688					if size == 1 {
  1689						for nbits >= 8 {
  1690							*dst = uint8(bits)
  1691							dst = add1(dst)
  1692							bits >>= 8
  1693							nbits -= 8
  1694						}
  1695					} else {
  1696						for nbits >= 4 {
  1697							*dst = uint8(bits&0xf | bitMarkedAll)
  1698							dst = subtract1(dst)
  1699							bits >>= 4
  1700							nbits -= 4
  1701						}
  1702					}
  1703				}
  1704	
  1705				// Add final fragment to bit buffer.
  1706				if c > 0 {
  1707					pattern &= 1<<c - 1
  1708					bits |= pattern << nbits
  1709					nbits += c
  1710				}
  1711				continue Run
  1712			}
  1713	
  1714			// Repeat; n too large to fit in a register.
  1715			// Since nbits <= 7, we know the first few bytes of repeated data
  1716			// are already written to memory.
  1717			off := n - nbits // n > nbits because n > maxBits and nbits <= 7
  1718			if size == 1 {
  1719				// Leading src fragment.
  1720				src = subtractb(src, (off+7)/8)
  1721				if frag := off & 7; frag != 0 {
  1722					bits |= uintptr(*src) >> (8 - frag) << nbits
  1723					src = add1(src)
  1724					nbits += frag
  1725					c -= frag
  1726				}
  1727				// Main loop: load one byte, write another.
  1728				// The bits are rotating through the bit buffer.
  1729				for i := c / 8; i > 0; i-- {
  1730					bits |= uintptr(*src) << nbits
  1731					src = add1(src)
  1732					*dst = uint8(bits)
  1733					dst = add1(dst)
  1734					bits >>= 8
  1735				}
  1736				// Final src fragment.
  1737				if c %= 8; c > 0 {
  1738					bits |= (uintptr(*src) & (1<<c - 1)) << nbits
  1739					nbits += c
  1740				}
  1741			} else {
  1742				// Leading src fragment.
  1743				src = addb(src, (off+3)/4)
  1744				if frag := off & 3; frag != 0 {
  1745					bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
  1746					src = subtract1(src)
  1747					nbits += frag
  1748					c -= frag
  1749				}
  1750				// Main loop: load one byte, write another.
  1751				// The bits are rotating through the bit buffer.
  1752				for i := c / 4; i > 0; i-- {
  1753					bits |= (uintptr(*src) & 0xf) << nbits
  1754					src = subtract1(src)
  1755					*dst = uint8(bits&0xf | bitMarkedAll)
  1756					dst = subtract1(dst)
  1757					bits >>= 4
  1758				}
  1759				// Final src fragment.
  1760				if c %= 4; c > 0 {
  1761					bits |= (uintptr(*src) & (1<<c - 1)) << nbits
  1762					nbits += c
  1763				}
  1764			}
  1765		}
  1766	
  1767		// Write any final bits out, using full-byte writes, even for the final byte.
  1768		var totalBits uintptr
  1769		if size == 1 {
  1770			totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
  1771			nbits += -nbits & 7
  1772			for ; nbits > 0; nbits -= 8 {
  1773				*dst = uint8(bits)
  1774				dst = add1(dst)
  1775				bits >>= 8
  1776			}
  1777		} else {
  1778			totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
  1779			nbits += -nbits & 3
  1780			for ; nbits > 0; nbits -= 4 {
  1781				v := bits&0xf | bitMarkedAll
  1782				*dst = uint8(v)
  1783				dst = subtract1(dst)
  1784				bits >>= 4
  1785			}
  1786		}
  1787		return totalBits
  1788	}
  1789	
  1790	func dumpGCProg(p *byte) {
  1791		nptr := 0
  1792		for {
  1793			x := *p
  1794			p = add1(p)
  1795			if x == 0 {
  1796				print("\t", nptr, " end\n")
  1797				break
  1798			}
  1799			if x&0x80 == 0 {
  1800				print("\t", nptr, " lit ", x, ":")
  1801				n := int(x+7) / 8
  1802				for i := 0; i < n; i++ {
  1803					print(" ", hex(*p))
  1804					p = add1(p)
  1805				}
  1806				print("\n")
  1807				nptr += int(x)
  1808			} else {
  1809				nbit := int(x &^ 0x80)
  1810				if nbit == 0 {
  1811					for nb := uint(0); ; nb += 7 {
  1812						x := *p
  1813						p = add1(p)
  1814						nbit |= int(x&0x7f) << nb
  1815						if x&0x80 == 0 {
  1816							break
  1817						}
  1818					}
  1819				}
  1820				count := 0
  1821				for nb := uint(0); ; nb += 7 {
  1822					x := *p
  1823					p = add1(p)
  1824					count |= int(x&0x7f) << nb
  1825					if x&0x80 == 0 {
  1826						break
  1827					}
  1828				}
  1829				print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
  1830				nptr += nbit * count
  1831			}
  1832		}
  1833	}
  1834	
  1835	// Testing.
  1836	
  1837	func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
  1838		target := (*stkframe)(ctxt)
  1839		if frame.sp <= target.sp && target.sp < frame.varp {
  1840			*target = *frame
  1841			return false
  1842		}
  1843		return true
  1844	}
  1845	
  1846	// gcbits returns the GC type info for x, for testing.
  1847	// The result is the bitmap entries (0 or 1), one entry per byte.
  1848	//go:linkname reflect_gcbits reflect.gcbits
  1849	func reflect_gcbits(x interface{}) []byte {
  1850		ret := getgcmask(x)
  1851		typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem
  1852		nptr := typ.ptrdata / sys.PtrSize
  1853		for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
  1854			ret = ret[:len(ret)-1]
  1855		}
  1856		return ret
  1857	}
  1858	
  1859	// Returns GC type info for object p for testing.
  1860	func getgcmask(ep interface{}) (mask []byte) {
  1861		e := *efaceOf(&ep)
  1862		p := e.data
  1863		t := e._type
  1864		// data or bss
  1865		for datap := &firstmoduledata; datap != nil; datap = datap.next {
  1866			// data
  1867			if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
  1868				bitmap := datap.gcdatamask.bytedata
  1869				n := (*ptrtype)(unsafe.Pointer(t)).elem.size
  1870				mask = make([]byte, n/sys.PtrSize)
  1871				for i := uintptr(0); i < n; i += sys.PtrSize {
  1872					off := (uintptr(p) + i - datap.data) / sys.PtrSize
  1873					mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
  1874				}
  1875				return
  1876			}
  1877	
  1878			// bss
  1879			if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
  1880				bitmap := datap.gcbssmask.bytedata
  1881				n := (*ptrtype)(unsafe.Pointer(t)).elem.size
  1882				mask = make([]byte, n/sys.PtrSize)
  1883				for i := uintptr(0); i < n; i += sys.PtrSize {
  1884					off := (uintptr(p) + i - datap.bss) / sys.PtrSize
  1885					mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
  1886				}
  1887				return
  1888			}
  1889		}
  1890	
  1891		// heap
  1892		var n uintptr
  1893		var base uintptr
  1894		if mlookup(uintptr(p), &base, &n, nil) != 0 {
  1895			mask = make([]byte, n/sys.PtrSize)
  1896			for i := uintptr(0); i < n; i += sys.PtrSize {
  1897				hbits := heapBitsForAddr(base + i)
  1898				if hbits.isPointer() {
  1899					mask[i/sys.PtrSize] = 1
  1900				}
  1901				if i != 1*sys.PtrSize && !hbits.morePointers() {
  1902					mask = mask[:i/sys.PtrSize]
  1903					break
  1904				}
  1905			}
  1906			return
  1907		}
  1908	
  1909		// stack
  1910		if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
  1911			var frame stkframe
  1912			frame.sp = uintptr(p)
  1913			_g_ := getg()
  1914			gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
  1915			if frame.fn != nil {
  1916				f := frame.fn
  1917				targetpc := frame.continpc
  1918				if targetpc == 0 {
  1919					return
  1920				}
  1921				if targetpc != f.entry {
  1922					targetpc--
  1923				}
  1924				pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, nil)
  1925				if pcdata == -1 {
  1926					return
  1927				}
  1928				stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
  1929				if stkmap == nil || stkmap.n <= 0 {
  1930					return
  1931				}
  1932				bv := stackmapdata(stkmap, pcdata)
  1933				size := uintptr(bv.n) * sys.PtrSize
  1934				n := (*ptrtype)(unsafe.Pointer(t)).elem.size
  1935				mask = make([]byte, n/sys.PtrSize)
  1936				for i := uintptr(0); i < n; i += sys.PtrSize {
  1937					bitmap := bv.bytedata
  1938					off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
  1939					mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
  1940				}
  1941			}
  1942			return
  1943		}
  1944	
  1945		// otherwise, not something the GC knows about.
  1946		// possibly read-only data, like malloc(0).
  1947		// must not have pointers
  1948		return
  1949	}
  1950	

View as plain text