...
Run Format

Source file src/runtime/mgcmark.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Garbage collector: marking and scanning
     6	
     7	package runtime
     8	
     9	import (
    10		"runtime/internal/atomic"
    11		"runtime/internal/sys"
    12		"unsafe"
    13	)
    14	
    15	const (
    16		fixedRootFinalizers = iota
    17		fixedRootFreeGStacks
    18		fixedRootCount
    19	
    20		// rootBlockBytes is the number of bytes to scan per data or
    21		// BSS root.
    22		rootBlockBytes = 256 << 10
    23	
    24		// rootBlockSpans is the number of spans to scan per span
    25		// root.
    26		rootBlockSpans = 8 * 1024 // 64MB worth of spans
    27	
    28		// maxObletBytes is the maximum bytes of an object to scan at
    29		// once. Larger objects will be split up into "oblets" of at
    30		// most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
    31		// scan preemption at ~100 µs.
    32		//
    33		// This must be > _MaxSmallSize so that the object base is the
    34		// span base.
    35		maxObletBytes = 128 << 10
    36	
    37		// idleCheckThreshold specifies how many units of work to do
    38		// between run queue checks in an idle worker. Assuming a scan
    39		// rate of 1 MB/ms, this is ~100 µs. Lower values have higher
    40		// overhead in the scan loop (the scheduler check may perform
    41		// a syscall, so its overhead is nontrivial). Higher values
    42		// make the system less responsive to incoming work.
    43		idleCheckThreshold = 100000
    44	)
    45	
    46	// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
    47	// some miscellany) and initializes scanning-related state.
    48	//
    49	// The caller must have call gcCopySpans().
    50	//
    51	// The world must be stopped.
    52	//
    53	//go:nowritebarrier
    54	func gcMarkRootPrepare() {
    55		if gcphase == _GCmarktermination {
    56			work.nFlushCacheRoots = int(gomaxprocs)
    57		} else {
    58			work.nFlushCacheRoots = 0
    59		}
    60	
    61		// Compute how many data and BSS root blocks there are.
    62		nBlocks := func(bytes uintptr) int {
    63			return int((bytes + rootBlockBytes - 1) / rootBlockBytes)
    64		}
    65	
    66		work.nDataRoots = 0
    67		work.nBSSRoots = 0
    68	
    69		// Only scan globals once per cycle; preferably concurrently.
    70		if !work.markrootDone {
    71			for _, datap := range activeModules() {
    72				nDataRoots := nBlocks(datap.edata - datap.data)
    73				if nDataRoots > work.nDataRoots {
    74					work.nDataRoots = nDataRoots
    75				}
    76			}
    77	
    78			for _, datap := range activeModules() {
    79				nBSSRoots := nBlocks(datap.ebss - datap.bss)
    80				if nBSSRoots > work.nBSSRoots {
    81					work.nBSSRoots = nBSSRoots
    82				}
    83			}
    84		}
    85	
    86		if !work.markrootDone {
    87			// On the first markroot, we need to scan span roots.
    88			// In concurrent GC, this happens during concurrent
    89			// mark and we depend on addfinalizer to ensure the
    90			// above invariants for objects that get finalizers
    91			// after concurrent mark. In STW GC, this will happen
    92			// during mark termination.
    93			//
    94			// We're only interested in scanning the in-use spans,
    95			// which will all be swept at this point. More spans
    96			// may be added to this list during concurrent GC, but
    97			// we only care about spans that were allocated before
    98			// this mark phase.
    99			work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
   100	
   101			// On the first markroot, we need to scan all Gs. Gs
   102			// may be created after this point, but it's okay that
   103			// we ignore them because they begin life without any
   104			// roots, so there's nothing to scan, and any roots
   105			// they create during the concurrent phase will be
   106			// scanned during mark termination. During mark
   107			// termination, allglen isn't changing, so we'll scan
   108			// all Gs.
   109			work.nStackRoots = int(atomic.Loaduintptr(&allglen))
   110			work.nRescanRoots = 0
   111		} else {
   112			// We've already scanned span roots and kept the scan
   113			// up-to-date during concurrent mark.
   114			work.nSpanRoots = 0
   115	
   116			// On the second pass of markroot, we're just scanning
   117			// dirty stacks. It's safe to access rescan since the
   118			// world is stopped.
   119			work.nStackRoots = 0
   120			work.nRescanRoots = len(work.rescan.list)
   121		}
   122	
   123		work.markrootNext = 0
   124		work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots + work.nRescanRoots)
   125	}
   126	
   127	// gcMarkRootCheck checks that all roots have been scanned. It is
   128	// purely for debugging.
   129	func gcMarkRootCheck() {
   130		if work.markrootNext < work.markrootJobs {
   131			print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n")
   132			throw("left over markroot jobs")
   133		}
   134	
   135		lock(&allglock)
   136		// Check that stacks have been scanned.
   137		var gp *g
   138		if gcphase == _GCmarktermination && debug.gcrescanstacks > 0 {
   139			for i := 0; i < len(allgs); i++ {
   140				gp = allgs[i]
   141				if !(gp.gcscandone && gp.gcscanvalid) && readgstatus(gp) != _Gdead {
   142					goto fail
   143				}
   144			}
   145		} else {
   146			for i := 0; i < work.nStackRoots; i++ {
   147				gp = allgs[i]
   148				if !gp.gcscandone {
   149					goto fail
   150				}
   151			}
   152		}
   153		unlock(&allglock)
   154		return
   155	
   156	fail:
   157		println("gp", gp, "goid", gp.goid,
   158			"status", readgstatus(gp),
   159			"gcscandone", gp.gcscandone,
   160			"gcscanvalid", gp.gcscanvalid)
   161		unlock(&allglock) // Avoid self-deadlock with traceback.
   162		throw("scan missed a g")
   163	}
   164	
   165	// ptrmask for an allocation containing a single pointer.
   166	var oneptrmask = [...]uint8{1}
   167	
   168	// markroot scans the i'th root.
   169	//
   170	// Preemption must be disabled (because this uses a gcWork).
   171	//
   172	// nowritebarrier is only advisory here.
   173	//
   174	//go:nowritebarrier
   175	func markroot(gcw *gcWork, i uint32) {
   176		// TODO(austin): This is a bit ridiculous. Compute and store
   177		// the bases in gcMarkRootPrepare instead of the counts.
   178		baseFlushCache := uint32(fixedRootCount)
   179		baseData := baseFlushCache + uint32(work.nFlushCacheRoots)
   180		baseBSS := baseData + uint32(work.nDataRoots)
   181		baseSpans := baseBSS + uint32(work.nBSSRoots)
   182		baseStacks := baseSpans + uint32(work.nSpanRoots)
   183		baseRescan := baseStacks + uint32(work.nStackRoots)
   184		end := baseRescan + uint32(work.nRescanRoots)
   185	
   186		// Note: if you add a case here, please also update heapdump.go:dumproots.
   187		switch {
   188		case baseFlushCache <= i && i < baseData:
   189			flushmcache(int(i - baseFlushCache))
   190	
   191		case baseData <= i && i < baseBSS:
   192			for _, datap := range activeModules() {
   193				markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-baseData))
   194			}
   195	
   196		case baseBSS <= i && i < baseSpans:
   197			for _, datap := range activeModules() {
   198				markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-baseBSS))
   199			}
   200	
   201		case i == fixedRootFinalizers:
   202			for fb := allfin; fb != nil; fb = fb.alllink {
   203				cnt := uintptr(atomic.Load(&fb.cnt))
   204				scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw)
   205			}
   206	
   207		case i == fixedRootFreeGStacks:
   208			// Only do this once per GC cycle; preferably
   209			// concurrently.
   210			if !work.markrootDone {
   211				// Switch to the system stack so we can call
   212				// stackfree.
   213				systemstack(markrootFreeGStacks)
   214			}
   215	
   216		case baseSpans <= i && i < baseStacks:
   217			// mark MSpan.specials
   218			markrootSpans(gcw, int(i-baseSpans))
   219	
   220		default:
   221			// the rest is scanning goroutine stacks
   222			var gp *g
   223			if baseStacks <= i && i < baseRescan {
   224				gp = allgs[i-baseStacks]
   225			} else if baseRescan <= i && i < end {
   226				gp = work.rescan.list[i-baseRescan].ptr()
   227				if gp.gcRescan != int32(i-baseRescan) {
   228					// Looking for issue #17099.
   229					println("runtime: gp", gp, "found at rescan index", i-baseRescan, "but should be at", gp.gcRescan)
   230					throw("bad g rescan index")
   231				}
   232			} else {
   233				throw("markroot: bad index")
   234			}
   235	
   236			// remember when we've first observed the G blocked
   237			// needed only to output in traceback
   238			status := readgstatus(gp) // We are not in a scan state
   239			if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
   240				gp.waitsince = work.tstart
   241			}
   242	
   243			// scang must be done on the system stack in case
   244			// we're trying to scan our own stack.
   245			systemstack(func() {
   246				// If this is a self-scan, put the user G in
   247				// _Gwaiting to prevent self-deadlock. It may
   248				// already be in _Gwaiting if this is a mark
   249				// worker or we're in mark termination.
   250				userG := getg().m.curg
   251				selfScan := gp == userG && readgstatus(userG) == _Grunning
   252				if selfScan {
   253					casgstatus(userG, _Grunning, _Gwaiting)
   254					userG.waitreason = "garbage collection scan"
   255				}
   256	
   257				// TODO: scang blocks until gp's stack has
   258				// been scanned, which may take a while for
   259				// running goroutines. Consider doing this in
   260				// two phases where the first is non-blocking:
   261				// we scan the stacks we can and ask running
   262				// goroutines to scan themselves; and the
   263				// second blocks.
   264				scang(gp, gcw)
   265	
   266				if selfScan {
   267					casgstatus(userG, _Gwaiting, _Grunning)
   268				}
   269			})
   270		}
   271	}
   272	
   273	// markrootBlock scans the shard'th shard of the block of memory [b0,
   274	// b0+n0), with the given pointer mask.
   275	//
   276	//go:nowritebarrier
   277	func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
   278		if rootBlockBytes%(8*sys.PtrSize) != 0 {
   279			// This is necessary to pick byte offsets in ptrmask0.
   280			throw("rootBlockBytes must be a multiple of 8*ptrSize")
   281		}
   282	
   283		b := b0 + uintptr(shard)*rootBlockBytes
   284		if b >= b0+n0 {
   285			return
   286		}
   287		ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*sys.PtrSize))))
   288		n := uintptr(rootBlockBytes)
   289		if b+n > b0+n0 {
   290			n = b0 + n0 - b
   291		}
   292	
   293		// Scan this shard.
   294		scanblock(b, n, ptrmask, gcw)
   295	}
   296	
   297	// markrootFreeGStacks frees stacks of dead Gs.
   298	//
   299	// This does not free stacks of dead Gs cached on Ps, but having a few
   300	// cached stacks around isn't a problem.
   301	//
   302	//TODO go:nowritebarrier
   303	func markrootFreeGStacks() {
   304		// Take list of dead Gs with stacks.
   305		lock(&sched.gflock)
   306		list := sched.gfreeStack
   307		sched.gfreeStack = nil
   308		unlock(&sched.gflock)
   309		if list == nil {
   310			return
   311		}
   312	
   313		// Free stacks.
   314		tail := list
   315		for gp := list; gp != nil; gp = gp.schedlink.ptr() {
   316			shrinkstack(gp)
   317			tail = gp
   318		}
   319	
   320		// Put Gs back on the free list.
   321		lock(&sched.gflock)
   322		tail.schedlink.set(sched.gfreeNoStack)
   323		sched.gfreeNoStack = list
   324		unlock(&sched.gflock)
   325	}
   326	
   327	// markrootSpans marks roots for one shard of work.spans.
   328	//
   329	//go:nowritebarrier
   330	func markrootSpans(gcw *gcWork, shard int) {
   331		// Objects with finalizers have two GC-related invariants:
   332		//
   333		// 1) Everything reachable from the object must be marked.
   334		// This ensures that when we pass the object to its finalizer,
   335		// everything the finalizer can reach will be retained.
   336		//
   337		// 2) Finalizer specials (which are not in the garbage
   338		// collected heap) are roots. In practice, this means the fn
   339		// field must be scanned.
   340		//
   341		// TODO(austin): There are several ideas for making this more
   342		// efficient in issue #11485.
   343	
   344		if work.markrootDone {
   345			throw("markrootSpans during second markroot")
   346		}
   347	
   348		sg := mheap_.sweepgen
   349		spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard)
   350		// Note that work.spans may not include spans that were
   351		// allocated between entering the scan phase and now. This is
   352		// okay because any objects with finalizers in those spans
   353		// must have been allocated and given finalizers after we
   354		// entered the scan phase, so addfinalizer will have ensured
   355		// the above invariants for them.
   356		for _, s := range spans {
   357			if s.state != mSpanInUse {
   358				continue
   359			}
   360			if !useCheckmark && s.sweepgen != sg {
   361				// sweepgen was updated (+2) during non-checkmark GC pass
   362				print("sweep ", s.sweepgen, " ", sg, "\n")
   363				throw("gc: unswept span")
   364			}
   365	
   366			// Speculatively check if there are any specials
   367			// without acquiring the span lock. This may race with
   368			// adding the first special to a span, but in that
   369			// case addfinalizer will observe that the GC is
   370			// active (which is globally synchronized) and ensure
   371			// the above invariants. We may also ensure the
   372			// invariants, but it's okay to scan an object twice.
   373			if s.specials == nil {
   374				continue
   375			}
   376	
   377			// Lock the specials to prevent a special from being
   378			// removed from the list while we're traversing it.
   379			lock(&s.speciallock)
   380	
   381			for sp := s.specials; sp != nil; sp = sp.next {
   382				if sp.kind != _KindSpecialFinalizer {
   383					continue
   384				}
   385				// don't mark finalized object, but scan it so we
   386				// retain everything it points to.
   387				spf := (*specialfinalizer)(unsafe.Pointer(sp))
   388				// A finalizer can be set for an inner byte of an object, find object beginning.
   389				p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
   390	
   391				// Mark everything that can be reached from
   392				// the object (but *not* the object itself or
   393				// we'll never collect it).
   394				scanobject(p, gcw)
   395	
   396				// The special itself is a root.
   397				scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw)
   398			}
   399	
   400			unlock(&s.speciallock)
   401		}
   402	}
   403	
   404	// gcAssistAlloc performs GC work to make gp's assist debt positive.
   405	// gp must be the calling user gorountine.
   406	//
   407	// This must be called with preemption enabled.
   408	func gcAssistAlloc(gp *g) {
   409		// Don't assist in non-preemptible contexts. These are
   410		// generally fragile and won't allow the assist to block.
   411		if getg() == gp.m.g0 {
   412			return
   413		}
   414		if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" {
   415			return
   416		}
   417	
   418	retry:
   419		// Compute the amount of scan work we need to do to make the
   420		// balance positive. When the required amount of work is low,
   421		// we over-assist to build up credit for future allocations
   422		// and amortize the cost of assisting.
   423		debtBytes := -gp.gcAssistBytes
   424		scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes))
   425		if scanWork < gcOverAssistWork {
   426			scanWork = gcOverAssistWork
   427			debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork))
   428		}
   429	
   430		// Steal as much credit as we can from the background GC's
   431		// scan credit. This is racy and may drop the background
   432		// credit below 0 if two mutators steal at the same time. This
   433		// will just cause steals to fail until credit is accumulated
   434		// again, so in the long run it doesn't really matter, but we
   435		// do have to handle the negative credit case.
   436		bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit)
   437		stolen := int64(0)
   438		if bgScanCredit > 0 {
   439			if bgScanCredit < scanWork {
   440				stolen = bgScanCredit
   441				gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen))
   442			} else {
   443				stolen = scanWork
   444				gp.gcAssistBytes += debtBytes
   445			}
   446			atomic.Xaddint64(&gcController.bgScanCredit, -stolen)
   447	
   448			scanWork -= stolen
   449	
   450			if scanWork == 0 {
   451				// We were able to steal all of the credit we
   452				// needed.
   453				return
   454			}
   455		}
   456	
   457		// Perform assist work
   458		systemstack(func() {
   459			gcAssistAlloc1(gp, scanWork)
   460			// The user stack may have moved, so this can't touch
   461			// anything on it until it returns from systemstack.
   462		})
   463	
   464		completed := gp.param != nil
   465		gp.param = nil
   466		if completed {
   467			gcMarkDone()
   468		}
   469	
   470		if gp.gcAssistBytes < 0 {
   471			// We were unable steal enough credit or perform
   472			// enough work to pay off the assist debt. We need to
   473			// do one of these before letting the mutator allocate
   474			// more to prevent over-allocation.
   475			//
   476			// If this is because we were preempted, reschedule
   477			// and try some more.
   478			if gp.preempt {
   479				Gosched()
   480				goto retry
   481			}
   482	
   483			// Add this G to an assist queue and park. When the GC
   484			// has more background credit, it will satisfy queued
   485			// assists before flushing to the global credit pool.
   486			//
   487			// Note that this does *not* get woken up when more
   488			// work is added to the work list. The theory is that
   489			// there wasn't enough work to do anyway, so we might
   490			// as well let background marking take care of the
   491			// work that is available.
   492			if !gcParkAssist() {
   493				goto retry
   494			}
   495	
   496			// At this point either background GC has satisfied
   497			// this G's assist debt, or the GC cycle is over.
   498		}
   499	}
   500	
   501	// gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system
   502	// stack. This is a separate function to make it easier to see that
   503	// we're not capturing anything from the user stack, since the user
   504	// stack may move while we're in this function.
   505	//
   506	// gcAssistAlloc1 indicates whether this assist completed the mark
   507	// phase by setting gp.param to non-nil. This can't be communicated on
   508	// the stack since it may move.
   509	//
   510	//go:systemstack
   511	func gcAssistAlloc1(gp *g, scanWork int64) {
   512		// Clear the flag indicating that this assist completed the
   513		// mark phase.
   514		gp.param = nil
   515	
   516		if atomic.Load(&gcBlackenEnabled) == 0 {
   517			// The gcBlackenEnabled check in malloc races with the
   518			// store that clears it but an atomic check in every malloc
   519			// would be a performance hit.
   520			// Instead we recheck it here on the non-preemptable system
   521			// stack to determine if we should preform an assist.
   522	
   523			// GC is done, so ignore any remaining debt.
   524			gp.gcAssistBytes = 0
   525			return
   526		}
   527		// Track time spent in this assist. Since we're on the
   528		// system stack, this is non-preemptible, so we can
   529		// just measure start and end time.
   530		startTime := nanotime()
   531	
   532		decnwait := atomic.Xadd(&work.nwait, -1)
   533		if decnwait == work.nproc {
   534			println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc)
   535			throw("nwait > work.nprocs")
   536		}
   537	
   538		// gcDrainN requires the caller to be preemptible.
   539		casgstatus(gp, _Grunning, _Gwaiting)
   540		gp.waitreason = "GC assist marking"
   541	
   542		// drain own cached work first in the hopes that it
   543		// will be more cache friendly.
   544		gcw := &getg().m.p.ptr().gcw
   545		workDone := gcDrainN(gcw, scanWork)
   546		// If we are near the end of the mark phase
   547		// dispose of the gcw.
   548		if gcBlackenPromptly {
   549			gcw.dispose()
   550		}
   551	
   552		casgstatus(gp, _Gwaiting, _Grunning)
   553	
   554		// Record that we did this much scan work.
   555		//
   556		// Back out the number of bytes of assist credit that
   557		// this scan work counts for. The "1+" is a poor man's
   558		// round-up, to ensure this adds credit even if
   559		// assistBytesPerWork is very low.
   560		gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone))
   561	
   562		// If this is the last worker and we ran out of work,
   563		// signal a completion point.
   564		incnwait := atomic.Xadd(&work.nwait, +1)
   565		if incnwait > work.nproc {
   566			println("runtime: work.nwait=", incnwait,
   567				"work.nproc=", work.nproc,
   568				"gcBlackenPromptly=", gcBlackenPromptly)
   569			throw("work.nwait > work.nproc")
   570		}
   571	
   572		if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
   573			// This has reached a background completion point. Set
   574			// gp.param to a non-nil value to indicate this. It
   575			// doesn't matter what we set it to (it just has to be
   576			// a valid pointer).
   577			gp.param = unsafe.Pointer(gp)
   578		}
   579		duration := nanotime() - startTime
   580		_p_ := gp.m.p.ptr()
   581		_p_.gcAssistTime += duration
   582		if _p_.gcAssistTime > gcAssistTimeSlack {
   583			atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime)
   584			_p_.gcAssistTime = 0
   585		}
   586	}
   587	
   588	// gcWakeAllAssists wakes all currently blocked assists. This is used
   589	// at the end of a GC cycle. gcBlackenEnabled must be false to prevent
   590	// new assists from going to sleep after this point.
   591	func gcWakeAllAssists() {
   592		lock(&work.assistQueue.lock)
   593		injectglist(work.assistQueue.head.ptr())
   594		work.assistQueue.head.set(nil)
   595		work.assistQueue.tail.set(nil)
   596		unlock(&work.assistQueue.lock)
   597	}
   598	
   599	// gcParkAssist puts the current goroutine on the assist queue and parks.
   600	//
   601	// gcParkAssist returns whether the assist is now satisfied. If it
   602	// returns false, the caller must retry the assist.
   603	//
   604	//go:nowritebarrier
   605	func gcParkAssist() bool {
   606		lock(&work.assistQueue.lock)
   607		// If the GC cycle finished while we were getting the lock,
   608		// exit the assist. The cycle can't finish while we hold the
   609		// lock.
   610		if atomic.Load(&gcBlackenEnabled) == 0 {
   611			unlock(&work.assistQueue.lock)
   612			return true
   613		}
   614	
   615		gp := getg()
   616		oldHead, oldTail := work.assistQueue.head, work.assistQueue.tail
   617		if oldHead == 0 {
   618			work.assistQueue.head.set(gp)
   619		} else {
   620			oldTail.ptr().schedlink.set(gp)
   621		}
   622		work.assistQueue.tail.set(gp)
   623		gp.schedlink.set(nil)
   624	
   625		// Recheck for background credit now that this G is in
   626		// the queue, but can still back out. This avoids a
   627		// race in case background marking has flushed more
   628		// credit since we checked above.
   629		if atomic.Loadint64(&gcController.bgScanCredit) > 0 {
   630			work.assistQueue.head = oldHead
   631			work.assistQueue.tail = oldTail
   632			if oldTail != 0 {
   633				oldTail.ptr().schedlink.set(nil)
   634			}
   635			unlock(&work.assistQueue.lock)
   636			return false
   637		}
   638		// Park.
   639		goparkunlock(&work.assistQueue.lock, "GC assist wait", traceEvGoBlockGC, 2)
   640		return true
   641	}
   642	
   643	// gcFlushBgCredit flushes scanWork units of background scan work
   644	// credit. This first satisfies blocked assists on the
   645	// work.assistQueue and then flushes any remaining credit to
   646	// gcController.bgScanCredit.
   647	//
   648	// Write barriers are disallowed because this is used by gcDrain after
   649	// it has ensured that all work is drained and this must preserve that
   650	// condition.
   651	//
   652	//go:nowritebarrierrec
   653	func gcFlushBgCredit(scanWork int64) {
   654		if work.assistQueue.head == 0 {
   655			// Fast path; there are no blocked assists. There's a
   656			// small window here where an assist may add itself to
   657			// the blocked queue and park. If that happens, we'll
   658			// just get it on the next flush.
   659			atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
   660			return
   661		}
   662	
   663		scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork)
   664	
   665		lock(&work.assistQueue.lock)
   666		gp := work.assistQueue.head.ptr()
   667		for gp != nil && scanBytes > 0 {
   668			// Note that gp.gcAssistBytes is negative because gp
   669			// is in debt. Think carefully about the signs below.
   670			if scanBytes+gp.gcAssistBytes >= 0 {
   671				// Satisfy this entire assist debt.
   672				scanBytes += gp.gcAssistBytes
   673				gp.gcAssistBytes = 0
   674				xgp := gp
   675				gp = gp.schedlink.ptr()
   676				// It's important that we *not* put xgp in
   677				// runnext. Otherwise, it's possible for user
   678				// code to exploit the GC worker's high
   679				// scheduler priority to get itself always run
   680				// before other goroutines and always in the
   681				// fresh quantum started by GC.
   682				ready(xgp, 0, false)
   683			} else {
   684				// Partially satisfy this assist.
   685				gp.gcAssistBytes += scanBytes
   686				scanBytes = 0
   687				// As a heuristic, we move this assist to the
   688				// back of the queue so that large assists
   689				// can't clog up the assist queue and
   690				// substantially delay small assists.
   691				xgp := gp
   692				gp = gp.schedlink.ptr()
   693				if gp == nil {
   694					// gp is the only assist in the queue.
   695					gp = xgp
   696				} else {
   697					xgp.schedlink = 0
   698					work.assistQueue.tail.ptr().schedlink.set(xgp)
   699					work.assistQueue.tail.set(xgp)
   700				}
   701				break
   702			}
   703		}
   704		work.assistQueue.head.set(gp)
   705		if gp == nil {
   706			work.assistQueue.tail.set(nil)
   707		}
   708	
   709		if scanBytes > 0 {
   710			// Convert from scan bytes back to work.
   711			scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte)
   712			atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
   713		}
   714		unlock(&work.assistQueue.lock)
   715	}
   716	
   717	// scanstack scans gp's stack, greying all pointers found on the stack.
   718	//
   719	// During mark phase, it also installs stack barriers while traversing
   720	// gp's stack. During mark termination, it stops scanning when it
   721	// reaches an unhit stack barrier.
   722	//
   723	// scanstack is marked go:systemstack because it must not be preempted
   724	// while using a workbuf.
   725	//
   726	//go:nowritebarrier
   727	//go:systemstack
   728	func scanstack(gp *g, gcw *gcWork) {
   729		if gp.gcscanvalid {
   730			return
   731		}
   732	
   733		if readgstatus(gp)&_Gscan == 0 {
   734			print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
   735			throw("scanstack - bad status")
   736		}
   737	
   738		switch readgstatus(gp) &^ _Gscan {
   739		default:
   740			print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
   741			throw("mark - bad status")
   742		case _Gdead:
   743			return
   744		case _Grunning:
   745			print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
   746			throw("scanstack: goroutine not stopped")
   747		case _Grunnable, _Gsyscall, _Gwaiting:
   748			// ok
   749		}
   750	
   751		if gp == getg() {
   752			throw("can't scan our own stack")
   753		}
   754		mp := gp.m
   755		if mp != nil && mp.helpgc != 0 {
   756			throw("can't scan gchelper stack")
   757		}
   758	
   759		// Shrink the stack if not much of it is being used. During
   760		// concurrent GC, we can do this during concurrent mark.
   761		if !work.markrootDone {
   762			shrinkstack(gp)
   763		}
   764	
   765		// Prepare for stack barrier insertion/removal.
   766		var sp, barrierOffset, nextBarrier uintptr
   767		if gp.syscallsp != 0 {
   768			sp = gp.syscallsp
   769		} else {
   770			sp = gp.sched.sp
   771		}
   772		gcLockStackBarriers(gp) // Not necessary during mark term, but harmless.
   773		switch gcphase {
   774		case _GCmark:
   775			// Install stack barriers during stack scan.
   776			barrierOffset = uintptr(firstStackBarrierOffset)
   777			nextBarrier = sp + barrierOffset
   778	
   779			if debug.gcstackbarrieroff > 0 {
   780				nextBarrier = ^uintptr(0)
   781			}
   782	
   783			// Remove any existing stack barriers before we
   784			// install new ones.
   785			gcRemoveStackBarriers(gp)
   786	
   787		case _GCmarktermination:
   788			if !work.markrootDone {
   789				// This is a STW GC. There may be stale stack
   790				// barriers from an earlier cycle since we
   791				// never passed through mark phase.
   792				gcRemoveStackBarriers(gp)
   793			}
   794	
   795			if int(gp.stkbarPos) == len(gp.stkbar) {
   796				// gp hit all of the stack barriers (or there
   797				// were none). Re-scan the whole stack.
   798				nextBarrier = ^uintptr(0)
   799			} else {
   800				// Only re-scan up to the lowest un-hit
   801				// barrier. Any frames above this have not
   802				// executed since the concurrent scan of gp and
   803				// any writes through up-pointers to above
   804				// this barrier had write barriers.
   805				nextBarrier = gp.stkbar[gp.stkbarPos].savedLRPtr
   806				if debugStackBarrier {
   807					print("rescan below ", hex(nextBarrier), " in [", hex(sp), ",", hex(gp.stack.hi), ") goid=", gp.goid, "\n")
   808				}
   809			}
   810	
   811		default:
   812			throw("scanstack in wrong phase")
   813		}
   814	
   815		// Scan the stack.
   816		var cache pcvalueCache
   817		n := 0
   818		scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
   819			scanframeworker(frame, &cache, gcw)
   820	
   821			if frame.fp > nextBarrier {
   822				// We skip installing a barrier on bottom-most
   823				// frame because on LR machines this LR is not
   824				// on the stack.
   825				if gcphase == _GCmark && n != 0 {
   826					if gcInstallStackBarrier(gp, frame) {
   827						barrierOffset *= 2
   828						nextBarrier = sp + barrierOffset
   829					}
   830				} else if gcphase == _GCmarktermination {
   831					// We just scanned a frame containing
   832					// a return to a stack barrier. Since
   833					// this frame never returned, we can
   834					// stop scanning.
   835					return false
   836				}
   837			}
   838			n++
   839	
   840			return true
   841		}
   842		gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
   843		tracebackdefers(gp, scanframe, nil)
   844		gcUnlockStackBarriers(gp)
   845		if gcphase == _GCmark {
   846			// gp may have added itself to the rescan list between
   847			// when GC started and now. It's clean now, so remove
   848			// it. This isn't safe during mark termination because
   849			// mark termination is consuming this list, but it's
   850			// also not necessary.
   851			dequeueRescan(gp)
   852		}
   853		gp.gcscanvalid = true
   854	}
   855	
   856	// Scan a stack frame: local variables and function arguments/results.
   857	//go:nowritebarrier
   858	func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) {
   859	
   860		f := frame.fn
   861		targetpc := frame.continpc
   862		if targetpc == 0 {
   863			// Frame is dead.
   864			return
   865		}
   866		if _DebugGC > 1 {
   867			print("scanframe ", funcname(f), "\n")
   868		}
   869		if targetpc != f.entry {
   870			targetpc--
   871		}
   872		pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, cache)
   873		if pcdata == -1 {
   874			// We do not have a valid pcdata value but there might be a
   875			// stackmap for this function. It is likely that we are looking
   876			// at the function prologue, assume so and hope for the best.
   877			pcdata = 0
   878		}
   879	
   880		// Scan local variables if stack frame has been allocated.
   881		size := frame.varp - frame.sp
   882		var minsize uintptr
   883		switch sys.ArchFamily {
   884		case sys.ARM64:
   885			minsize = sys.SpAlign
   886		default:
   887			minsize = sys.MinFrameSize
   888		}
   889		if size > minsize {
   890			stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
   891			if stkmap == nil || stkmap.n <= 0 {
   892				print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
   893				throw("missing stackmap")
   894			}
   895	
   896			// Locals bitmap information, scan just the pointers in locals.
   897			if pcdata < 0 || pcdata >= stkmap.n {
   898				// don't know where we are
   899				print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
   900				throw("scanframe: bad symbol table")
   901			}
   902			bv := stackmapdata(stkmap, pcdata)
   903			size = uintptr(bv.n) * sys.PtrSize
   904			scanblock(frame.varp-size, size, bv.bytedata, gcw)
   905		}
   906	
   907		// Scan arguments.
   908		if frame.arglen > 0 {
   909			var bv bitvector
   910			if frame.argmap != nil {
   911				bv = *frame.argmap
   912			} else {
   913				stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
   914				if stkmap == nil || stkmap.n <= 0 {
   915					print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
   916					throw("missing stackmap")
   917				}
   918				if pcdata < 0 || pcdata >= stkmap.n {
   919					// don't know where we are
   920					print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
   921					throw("scanframe: bad symbol table")
   922				}
   923				bv = stackmapdata(stkmap, pcdata)
   924			}
   925			scanblock(frame.argp, uintptr(bv.n)*sys.PtrSize, bv.bytedata, gcw)
   926		}
   927	}
   928	
   929	// queueRescan adds gp to the stack rescan list and clears
   930	// gp.gcscanvalid. The caller must own gp and ensure that gp isn't
   931	// already on the rescan list.
   932	func queueRescan(gp *g) {
   933		if debug.gcrescanstacks == 0 {
   934			// Clear gcscanvalid to keep assertions happy.
   935			//
   936			// TODO: Remove gcscanvalid entirely when we remove
   937			// stack rescanning.
   938			gp.gcscanvalid = false
   939			return
   940		}
   941	
   942		if gcphase == _GCoff {
   943			gp.gcscanvalid = false
   944			return
   945		}
   946		if gp.gcRescan != -1 {
   947			throw("g already on rescan list")
   948		}
   949	
   950		lock(&work.rescan.lock)
   951		gp.gcscanvalid = false
   952	
   953		// Recheck gcphase under the lock in case there was a phase change.
   954		if gcphase == _GCoff {
   955			unlock(&work.rescan.lock)
   956			return
   957		}
   958		if len(work.rescan.list) == cap(work.rescan.list) {
   959			throw("rescan list overflow")
   960		}
   961		n := len(work.rescan.list)
   962		gp.gcRescan = int32(n)
   963		work.rescan.list = work.rescan.list[:n+1]
   964		work.rescan.list[n].set(gp)
   965		unlock(&work.rescan.lock)
   966	}
   967	
   968	// dequeueRescan removes gp from the stack rescan list, if gp is on
   969	// the rescan list. The caller must own gp.
   970	func dequeueRescan(gp *g) {
   971		if debug.gcrescanstacks == 0 {
   972			return
   973		}
   974	
   975		if gp.gcRescan == -1 {
   976			return
   977		}
   978		if gcphase == _GCoff {
   979			gp.gcRescan = -1
   980			return
   981		}
   982	
   983		lock(&work.rescan.lock)
   984		if work.rescan.list[gp.gcRescan].ptr() != gp {
   985			throw("bad dequeueRescan")
   986		}
   987		// Careful: gp may itself be the last G on the list.
   988		last := work.rescan.list[len(work.rescan.list)-1]
   989		work.rescan.list[gp.gcRescan] = last
   990		last.ptr().gcRescan = gp.gcRescan
   991		gp.gcRescan = -1
   992		work.rescan.list = work.rescan.list[:len(work.rescan.list)-1]
   993		unlock(&work.rescan.lock)
   994	}
   995	
   996	type gcDrainFlags int
   997	
   998	const (
   999		gcDrainUntilPreempt gcDrainFlags = 1 << iota
  1000		gcDrainNoBlock
  1001		gcDrainFlushBgCredit
  1002		gcDrainIdle
  1003	
  1004		// gcDrainBlock means neither gcDrainUntilPreempt or
  1005		// gcDrainNoBlock. It is the default, but callers should use
  1006		// the constant for documentation purposes.
  1007		gcDrainBlock gcDrainFlags = 0
  1008	)
  1009	
  1010	// gcDrain scans roots and objects in work buffers, blackening grey
  1011	// objects until all roots and work buffers have been drained.
  1012	//
  1013	// If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
  1014	// is set. This implies gcDrainNoBlock.
  1015	//
  1016	// If flags&gcDrainIdle != 0, gcDrain returns when there is other work
  1017	// to do. This implies gcDrainNoBlock.
  1018	//
  1019	// If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
  1020	// unable to get more work. Otherwise, it will block until all
  1021	// blocking calls are blocked in gcDrain.
  1022	//
  1023	// If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
  1024	// credit to gcController.bgScanCredit every gcCreditSlack units of
  1025	// scan work.
  1026	//
  1027	//go:nowritebarrier
  1028	func gcDrain(gcw *gcWork, flags gcDrainFlags) {
  1029		if !writeBarrier.needed {
  1030			throw("gcDrain phase incorrect")
  1031		}
  1032	
  1033		gp := getg().m.curg
  1034		preemptible := flags&gcDrainUntilPreempt != 0
  1035		blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0
  1036		flushBgCredit := flags&gcDrainFlushBgCredit != 0
  1037		idle := flags&gcDrainIdle != 0
  1038	
  1039		initScanWork := gcw.scanWork
  1040		// idleCheck is the scan work at which to perform the next
  1041		// idle check with the scheduler.
  1042		idleCheck := initScanWork + idleCheckThreshold
  1043	
  1044		// Drain root marking jobs.
  1045		if work.markrootNext < work.markrootJobs {
  1046			for !(preemptible && gp.preempt) {
  1047				job := atomic.Xadd(&work.markrootNext, +1) - 1
  1048				if job >= work.markrootJobs {
  1049					break
  1050				}
  1051				markroot(gcw, job)
  1052				if idle && pollWork() {
  1053					goto done
  1054				}
  1055			}
  1056		}
  1057	
  1058		// Drain heap marking jobs.
  1059		for !(preemptible && gp.preempt) {
  1060			// Try to keep work available on the global queue. We used to
  1061			// check if there were waiting workers, but it's better to
  1062			// just keep work available than to make workers wait. In the
  1063			// worst case, we'll do O(log(_WorkbufSize)) unnecessary
  1064			// balances.
  1065			if work.full == 0 {
  1066				gcw.balance()
  1067			}
  1068	
  1069			var b uintptr
  1070			if blocking {
  1071				b = gcw.get()
  1072			} else {
  1073				b = gcw.tryGetFast()
  1074				if b == 0 {
  1075					b = gcw.tryGet()
  1076				}
  1077			}
  1078			if b == 0 {
  1079				// work barrier reached or tryGet failed.
  1080				break
  1081			}
  1082			scanobject(b, gcw)
  1083	
  1084			// Flush background scan work credit to the global
  1085			// account if we've accumulated enough locally so
  1086			// mutator assists can draw on it.
  1087			if gcw.scanWork >= gcCreditSlack {
  1088				atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
  1089				if flushBgCredit {
  1090					gcFlushBgCredit(gcw.scanWork - initScanWork)
  1091					initScanWork = 0
  1092				}
  1093				idleCheck -= gcw.scanWork
  1094				gcw.scanWork = 0
  1095	
  1096				if idle && idleCheck <= 0 {
  1097					idleCheck += idleCheckThreshold
  1098					if pollWork() {
  1099						break
  1100					}
  1101				}
  1102			}
  1103		}
  1104	
  1105		// In blocking mode, write barriers are not allowed after this
  1106		// point because we must preserve the condition that the work
  1107		// buffers are empty.
  1108	
  1109	done:
  1110		// Flush remaining scan work credit.
  1111		if gcw.scanWork > 0 {
  1112			atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
  1113			if flushBgCredit {
  1114				gcFlushBgCredit(gcw.scanWork - initScanWork)
  1115			}
  1116			gcw.scanWork = 0
  1117		}
  1118	}
  1119	
  1120	// gcDrainN blackens grey objects until it has performed roughly
  1121	// scanWork units of scan work or the G is preempted. This is
  1122	// best-effort, so it may perform less work if it fails to get a work
  1123	// buffer. Otherwise, it will perform at least n units of work, but
  1124	// may perform more because scanning is always done in whole object
  1125	// increments. It returns the amount of scan work performed.
  1126	//
  1127	// The caller goroutine must be in a preemptible state (e.g.,
  1128	// _Gwaiting) to prevent deadlocks during stack scanning. As a
  1129	// consequence, this must be called on the system stack.
  1130	//
  1131	//go:nowritebarrier
  1132	//go:systemstack
  1133	func gcDrainN(gcw *gcWork, scanWork int64) int64 {
  1134		if !writeBarrier.needed {
  1135			throw("gcDrainN phase incorrect")
  1136		}
  1137	
  1138		// There may already be scan work on the gcw, which we don't
  1139		// want to claim was done by this call.
  1140		workFlushed := -gcw.scanWork
  1141	
  1142		gp := getg().m.curg
  1143		for !gp.preempt && workFlushed+gcw.scanWork < scanWork {
  1144			// See gcDrain comment.
  1145			if work.full == 0 {
  1146				gcw.balance()
  1147			}
  1148	
  1149			// This might be a good place to add prefetch code...
  1150			// if(wbuf.nobj > 4) {
  1151			//         PREFETCH(wbuf->obj[wbuf.nobj - 3];
  1152			//  }
  1153			//
  1154			b := gcw.tryGetFast()
  1155			if b == 0 {
  1156				b = gcw.tryGet()
  1157			}
  1158	
  1159			if b == 0 {
  1160				// Try to do a root job.
  1161				//
  1162				// TODO: Assists should get credit for this
  1163				// work.
  1164				if work.markrootNext < work.markrootJobs {
  1165					job := atomic.Xadd(&work.markrootNext, +1) - 1
  1166					if job < work.markrootJobs {
  1167						markroot(gcw, job)
  1168						continue
  1169					}
  1170				}
  1171				// No heap or root jobs.
  1172				break
  1173			}
  1174			scanobject(b, gcw)
  1175	
  1176			// Flush background scan work credit.
  1177			if gcw.scanWork >= gcCreditSlack {
  1178				atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
  1179				workFlushed += gcw.scanWork
  1180				gcw.scanWork = 0
  1181			}
  1182		}
  1183	
  1184		// Unlike gcDrain, there's no need to flush remaining work
  1185		// here because this never flushes to bgScanCredit and
  1186		// gcw.dispose will flush any remaining work to scanWork.
  1187	
  1188		return workFlushed + gcw.scanWork
  1189	}
  1190	
  1191	// scanblock scans b as scanobject would, but using an explicit
  1192	// pointer bitmap instead of the heap bitmap.
  1193	//
  1194	// This is used to scan non-heap roots, so it does not update
  1195	// gcw.bytesMarked or gcw.scanWork.
  1196	//
  1197	//go:nowritebarrier
  1198	func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
  1199		// Use local copies of original parameters, so that a stack trace
  1200		// due to one of the throws below shows the original block
  1201		// base and extent.
  1202		b := b0
  1203		n := n0
  1204	
  1205		arena_start := mheap_.arena_start
  1206		arena_used := mheap_.arena_used
  1207	
  1208		for i := uintptr(0); i < n; {
  1209			// Find bits for the next word.
  1210			bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
  1211			if bits == 0 {
  1212				i += sys.PtrSize * 8
  1213				continue
  1214			}
  1215			for j := 0; j < 8 && i < n; j++ {
  1216				if bits&1 != 0 {
  1217					// Same work as in scanobject; see comments there.
  1218					obj := *(*uintptr)(unsafe.Pointer(b + i))
  1219					if obj != 0 && arena_start <= obj && obj < arena_used {
  1220						if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 {
  1221							greyobject(obj, b, i, hbits, span, gcw, objIndex)
  1222						}
  1223					}
  1224				}
  1225				bits >>= 1
  1226				i += sys.PtrSize
  1227			}
  1228		}
  1229	}
  1230	
  1231	// scanobject scans the object starting at b, adding pointers to gcw.
  1232	// b must point to the beginning of a heap object or an oblet.
  1233	// scanobject consults the GC bitmap for the pointer mask and the
  1234	// spans for the size of the object.
  1235	//
  1236	//go:nowritebarrier
  1237	func scanobject(b uintptr, gcw *gcWork) {
  1238		// Note that arena_used may change concurrently during
  1239		// scanobject and hence scanobject may encounter a pointer to
  1240		// a newly allocated heap object that is *not* in
  1241		// [start,used). It will not mark this object; however, we
  1242		// know that it was just installed by a mutator, which means
  1243		// that mutator will execute a write barrier and take care of
  1244		// marking it. This is even more pronounced on relaxed memory
  1245		// architectures since we access arena_used without barriers
  1246		// or synchronization, but the same logic applies.
  1247		arena_start := mheap_.arena_start
  1248		arena_used := mheap_.arena_used
  1249	
  1250		// Find the bits for b and the size of the object at b.
  1251		//
  1252		// b is either the beginning of an object, in which case this
  1253		// is the size of the object to scan, or it points to an
  1254		// oblet, in which case we compute the size to scan below.
  1255		hbits := heapBitsForAddr(b)
  1256		s := spanOfUnchecked(b)
  1257		n := s.elemsize
  1258		if n == 0 {
  1259			throw("scanobject n == 0")
  1260		}
  1261	
  1262		if n > maxObletBytes {
  1263			// Large object. Break into oblets for better
  1264			// parallelism and lower latency.
  1265			if b == s.base() {
  1266				// It's possible this is a noscan object (not
  1267				// from greyobject, but from other code
  1268				// paths), in which case we must *not* enqueue
  1269				// oblets since their bitmaps will be
  1270				// uninitialized.
  1271				if !hbits.hasPointers(n) {
  1272					// Bypass the whole scan.
  1273					gcw.bytesMarked += uint64(n)
  1274					return
  1275				}
  1276	
  1277				// Enqueue the other oblets to scan later.
  1278				// Some oblets may be in b's scalar tail, but
  1279				// these will be marked as "no more pointers",
  1280				// so we'll drop out immediately when we go to
  1281				// scan those.
  1282				for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes {
  1283					if !gcw.putFast(oblet) {
  1284						gcw.put(oblet)
  1285					}
  1286				}
  1287			}
  1288	
  1289			// Compute the size of the oblet. Since this object
  1290			// must be a large object, s.base() is the beginning
  1291			// of the object.
  1292			n = s.base() + s.elemsize - b
  1293			if n > maxObletBytes {
  1294				n = maxObletBytes
  1295			}
  1296		}
  1297	
  1298		var i uintptr
  1299		for i = 0; i < n; i += sys.PtrSize {
  1300			// Find bits for this word.
  1301			if i != 0 {
  1302				// Avoid needless hbits.next() on last iteration.
  1303				hbits = hbits.next()
  1304			}
  1305			// Load bits once. See CL 22712 and issue 16973 for discussion.
  1306			bits := hbits.bits()
  1307			// During checkmarking, 1-word objects store the checkmark
  1308			// in the type bit for the one word. The only one-word objects
  1309			// are pointers, or else they'd be merged with other non-pointer
  1310			// data into larger allocations.
  1311			if i != 1*sys.PtrSize && bits&bitScan == 0 {
  1312				break // no more pointers in this object
  1313			}
  1314			if bits&bitPointer == 0 {
  1315				continue // not a pointer
  1316			}
  1317	
  1318			// Work here is duplicated in scanblock and above.
  1319			// If you make changes here, make changes there too.
  1320			obj := *(*uintptr)(unsafe.Pointer(b + i))
  1321	
  1322			// At this point we have extracted the next potential pointer.
  1323			// Check if it points into heap and not back at the current object.
  1324			if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
  1325				// Mark the object.
  1326				if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 {
  1327					greyobject(obj, b, i, hbits, span, gcw, objIndex)
  1328				}
  1329			}
  1330		}
  1331		gcw.bytesMarked += uint64(n)
  1332		gcw.scanWork += int64(i)
  1333	}
  1334	
  1335	// Shade the object if it isn't already.
  1336	// The object is not nil and known to be in the heap.
  1337	// Preemption must be disabled.
  1338	//go:nowritebarrier
  1339	func shade(b uintptr) {
  1340		if obj, hbits, span, objIndex := heapBitsForObject(b, 0, 0); obj != 0 {
  1341			gcw := &getg().m.p.ptr().gcw
  1342			greyobject(obj, 0, 0, hbits, span, gcw, objIndex)
  1343			if gcphase == _GCmarktermination || gcBlackenPromptly {
  1344				// Ps aren't allowed to cache work during mark
  1345				// termination.
  1346				gcw.dispose()
  1347			}
  1348		}
  1349	}
  1350	
  1351	// obj is the start of an object with mark mbits.
  1352	// If it isn't already marked, mark it and enqueue into gcw.
  1353	// base and off are for debugging only and could be removed.
  1354	//go:nowritebarrierrec
  1355	func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr) {
  1356		// obj should be start of allocation, and so must be at least pointer-aligned.
  1357		if obj&(sys.PtrSize-1) != 0 {
  1358			throw("greyobject: obj not pointer-aligned")
  1359		}
  1360		mbits := span.markBitsForIndex(objIndex)
  1361	
  1362		if useCheckmark {
  1363			if !mbits.isMarked() {
  1364				printlock()
  1365				print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
  1366				print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
  1367	
  1368				// Dump the source (base) object
  1369				gcDumpObject("base", base, off)
  1370	
  1371				// Dump the object
  1372				gcDumpObject("obj", obj, ^uintptr(0))
  1373	
  1374				throw("checkmark found unmarked object")
  1375			}
  1376			if hbits.isCheckmarked(span.elemsize) {
  1377				return
  1378			}
  1379			hbits.setCheckmarked(span.elemsize)
  1380			if !hbits.isCheckmarked(span.elemsize) {
  1381				throw("setCheckmarked and isCheckmarked disagree")
  1382			}
  1383		} else {
  1384			if debug.gccheckmark > 0 && span.isFree(objIndex) {
  1385				print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n")
  1386				gcDumpObject("base", base, off)
  1387				gcDumpObject("obj", obj, ^uintptr(0))
  1388				throw("marking free object")
  1389			}
  1390	
  1391			// If marked we have nothing to do.
  1392			if mbits.isMarked() {
  1393				return
  1394			}
  1395			// mbits.setMarked() // Avoid extra call overhead with manual inlining.
  1396			atomic.Or8(mbits.bytep, mbits.mask)
  1397			// If this is a noscan object, fast-track it to black
  1398			// instead of greying it.
  1399			if !hbits.hasPointers(span.elemsize) {
  1400				gcw.bytesMarked += uint64(span.elemsize)
  1401				return
  1402			}
  1403		}
  1404	
  1405		// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
  1406		// seems like a nice optimization that can be added back in.
  1407		// There needs to be time between the PREFETCH and the use.
  1408		// Previously we put the obj in an 8 element buffer that is drained at a rate
  1409		// to give the PREFETCH time to do its work.
  1410		// Use of PREFETCHNTA might be more appropriate than PREFETCH
  1411		if !gcw.putFast(obj) {
  1412			gcw.put(obj)
  1413		}
  1414	}
  1415	
  1416	// gcDumpObject dumps the contents of obj for debugging and marks the
  1417	// field at byte offset off in obj.
  1418	func gcDumpObject(label string, obj, off uintptr) {
  1419		if obj < mheap_.arena_start || obj >= mheap_.arena_used {
  1420			print(label, "=", hex(obj), " is not in the Go heap\n")
  1421			return
  1422		}
  1423		k := obj >> _PageShift
  1424		x := k
  1425		x -= mheap_.arena_start >> _PageShift
  1426		s := mheap_.spans[x]
  1427		print(label, "=", hex(obj), " k=", hex(k))
  1428		if s == nil {
  1429			print(" s=nil\n")
  1430			return
  1431		}
  1432		print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, " s.state=")
  1433		if 0 <= s.state && int(s.state) < len(mSpanStateNames) {
  1434			print(mSpanStateNames[s.state], "\n")
  1435		} else {
  1436			print("unknown(", s.state, ")\n")
  1437		}
  1438	
  1439		skipped := false
  1440		size := s.elemsize
  1441		if s.state == _MSpanStack && size == 0 {
  1442			// We're printing something from a stack frame. We
  1443			// don't know how big it is, so just show up to an
  1444			// including off.
  1445			size = off + sys.PtrSize
  1446		}
  1447		for i := uintptr(0); i < size; i += sys.PtrSize {
  1448			// For big objects, just print the beginning (because
  1449			// that usually hints at the object's type) and the
  1450			// fields around off.
  1451			if !(i < 128*sys.PtrSize || off-16*sys.PtrSize < i && i < off+16*sys.PtrSize) {
  1452				skipped = true
  1453				continue
  1454			}
  1455			if skipped {
  1456				print(" ...\n")
  1457				skipped = false
  1458			}
  1459			print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + i))))
  1460			if i == off {
  1461				print(" <==")
  1462			}
  1463			print("\n")
  1464		}
  1465		if skipped {
  1466			print(" ...\n")
  1467		}
  1468	}
  1469	
  1470	// gcmarknewobject marks a newly allocated object black. obj must
  1471	// not contain any non-nil pointers.
  1472	//
  1473	// This is nosplit so it can manipulate a gcWork without preemption.
  1474	//
  1475	//go:nowritebarrier
  1476	//go:nosplit
  1477	func gcmarknewobject(obj, size, scanSize uintptr) {
  1478		if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
  1479			throw("gcmarknewobject called while doing checkmark")
  1480		}
  1481		markBitsForAddr(obj).setMarked()
  1482		gcw := &getg().m.p.ptr().gcw
  1483		gcw.bytesMarked += uint64(size)
  1484		gcw.scanWork += int64(scanSize)
  1485		if gcBlackenPromptly {
  1486			// There shouldn't be anything in the work queue, but
  1487			// we still need to flush stats.
  1488			gcw.dispose()
  1489		}
  1490	}
  1491	
  1492	// gcMarkTinyAllocs greys all active tiny alloc blocks.
  1493	//
  1494	// The world must be stopped.
  1495	func gcMarkTinyAllocs() {
  1496		for _, p := range &allp {
  1497			if p == nil || p.status == _Pdead {
  1498				break
  1499			}
  1500			c := p.mcache
  1501			if c == nil || c.tiny == 0 {
  1502				continue
  1503			}
  1504			_, hbits, span, objIndex := heapBitsForObject(c.tiny, 0, 0)
  1505			gcw := &p.gcw
  1506			greyobject(c.tiny, 0, 0, hbits, span, gcw, objIndex)
  1507			if gcBlackenPromptly {
  1508				gcw.dispose()
  1509			}
  1510		}
  1511	}
  1512	
  1513	// Checkmarking
  1514	
  1515	// To help debug the concurrent GC we remark with the world
  1516	// stopped ensuring that any object encountered has their normal
  1517	// mark bit set. To do this we use an orthogonal bit
  1518	// pattern to indicate the object is marked. The following pattern
  1519	// uses the upper two bits in the object's boundary nibble.
  1520	// 01: scalar  not marked
  1521	// 10: pointer not marked
  1522	// 11: pointer     marked
  1523	// 00: scalar      marked
  1524	// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
  1525	// The higher bit is 1 for pointers and 0 for scalars, whether the object
  1526	// is marked or not.
  1527	// The first nibble no longer holds the typeDead pattern indicating that the
  1528	// there are no more pointers in the object. This information is held
  1529	// in the second nibble.
  1530	
  1531	// If useCheckmark is true, marking of an object uses the
  1532	// checkmark bits (encoding above) instead of the standard
  1533	// mark bits.
  1534	var useCheckmark = false
  1535	
  1536	//go:nowritebarrier
  1537	func initCheckmarks() {
  1538		useCheckmark = true
  1539		for _, s := range mheap_.allspans {
  1540			if s.state == _MSpanInUse {
  1541				heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
  1542			}
  1543		}
  1544	}
  1545	
  1546	func clearCheckmarks() {
  1547		useCheckmark = false
  1548		for _, s := range mheap_.allspans {
  1549			if s.state == _MSpanInUse {
  1550				heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
  1551			}
  1552		}
  1553	}
  1554	

View as plain text