...
Run Format

Source file src/runtime/mgc.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// TODO(rsc): The code having to do with the heap bitmap needs very serious cleanup.
     6	// It has gotten completely out of control.
     7	
     8	// Garbage collector (GC).
     9	//
    10	// The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple
    11	// GC thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is
    12	// non-generational and non-compacting. Allocation is done using size segregated per P allocation
    13	// areas to minimize fragmentation while eliminating locks in the common case.
    14	//
    15	// The algorithm decomposes into several steps.
    16	// This is a high level description of the algorithm being used. For an overview of GC a good
    17	// place to start is Richard Jones' gchandbook.org.
    18	//
    19	// The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see
    20	// Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978.
    21	// On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978),
    22	// 966-975.
    23	// For journal quality proofs that these steps are complete, correct, and terminate see
    24	// Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world.
    25	// Concurrency and Computation: Practice and Experience 15(3-5), 2003.
    26	//
    27	// TODO(austin): The rest of this comment is woefully out of date and
    28	// needs to be rewritten. There is no distinct scan phase any more and
    29	// we allocate black during GC.
    30	//
    31	//  0. Set phase = GCscan from GCoff.
    32	//  1. Wait for all P's to acknowledge phase change.
    33	//         At this point all goroutines have passed through a GC safepoint and
    34	//         know we are in the GCscan phase.
    35	//  2. GC scans all goroutine stacks, mark and enqueues all encountered pointers
    36	//       (marking avoids most duplicate enqueuing but races may produce benign duplication).
    37	//       Preempted goroutines are scanned before P schedules next goroutine.
    38	//  3. Set phase = GCmark.
    39	//  4. Wait for all P's to acknowledge phase change.
    40	//  5. Now write barrier marks and enqueues black, grey, or white to white pointers.
    41	//       Malloc still allocates white (non-marked) objects.
    42	//  6. Meanwhile GC transitively walks the heap marking reachable objects.
    43	//  7. When GC finishes marking heap, it preempts P's one-by-one and
    44	//       retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine
    45	//       currently scheduled on the P).
    46	//  8. Once the GC has exhausted all available marking work it sets phase = marktermination.
    47	//  9. Wait for all P's to acknowledge phase change.
    48	// 10. Malloc now allocates black objects, so number of unmarked reachable objects
    49	//        monotonically decreases.
    50	// 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet
    51	//        reachable objects.
    52	// 12. When GC completes a full cycle over P's and discovers no new grey
    53	//         objects, (which means all reachable objects are marked) set phase = GCoff.
    54	// 13. Wait for all P's to acknowledge phase change.
    55	// 14. Now malloc allocates white (but sweeps spans before use).
    56	//         Write barrier becomes nop.
    57	// 15. GC does background sweeping, see description below.
    58	// 16. When sufficient allocation has taken place replay the sequence starting at 0 above,
    59	//         see discussion of GC rate below.
    60	
    61	// Changing phases.
    62	// Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase.
    63	// All phase action must be benign in the presence of a change.
    64	// Starting with GCoff
    65	// GCoff to GCscan
    66	//     GSscan scans stacks and globals greying them and never marks an object black.
    67	//     Once all the P's are aware of the new phase they will scan gs on preemption.
    68	//     This means that the scanning of preempted gs can't start until all the Ps
    69	//     have acknowledged.
    70	//     When a stack is scanned, this phase also installs stack barriers to
    71	//     track how much of the stack has been active.
    72	//     This transition enables write barriers because stack barriers
    73	//     assume that writes to higher frames will be tracked by write
    74	//     barriers. Technically this only needs write barriers for writes
    75	//     to stack slots, but we enable write barriers in general.
    76	// GCscan to GCmark
    77	//     In GCmark, work buffers are drained until there are no more
    78	//     pointers to scan.
    79	//     No scanning of objects (making them black) can happen until all
    80	//     Ps have enabled the write barrier, but that already happened in
    81	//     the transition to GCscan.
    82	// GCmark to GCmarktermination
    83	//     The only change here is that we start allocating black so the Ps must acknowledge
    84	//     the change before we begin the termination algorithm
    85	// GCmarktermination to GSsweep
    86	//     Object currently on the freelist must be marked black for this to work.
    87	//     Are things on the free lists black or white? How does the sweep phase work?
    88	
    89	// Concurrent sweep.
    90	//
    91	// The sweep phase proceeds concurrently with normal program execution.
    92	// The heap is swept span-by-span both lazily (when a goroutine needs another span)
    93	// and concurrently in a background goroutine (this helps programs that are not CPU bound).
    94	// At the end of STW mark termination all spans are marked as "needs sweeping".
    95	//
    96	// The background sweeper goroutine simply sweeps spans one-by-one.
    97	//
    98	// To avoid requesting more OS memory while there are unswept spans, when a
    99	// goroutine needs another span, it first attempts to reclaim that much memory
   100	// by sweeping. When a goroutine needs to allocate a new small-object span, it
   101	// sweeps small-object spans for the same object size until it frees at least
   102	// one object. When a goroutine needs to allocate large-object span from heap,
   103	// it sweeps spans until it frees at least that many pages into heap. There is
   104	// one case where this may not suffice: if a goroutine sweeps and frees two
   105	// nonadjacent one-page spans to the heap, it will allocate a new two-page
   106	// span, but there can still be other one-page unswept spans which could be
   107	// combined into a two-page span.
   108	//
   109	// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
   110	// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
   111	// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
   112	// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
   113	// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
   114	// The finalizer goroutine is kicked off only when all spans are swept.
   115	// When the next GC starts, it sweeps all not-yet-swept spans (if any).
   116	
   117	// GC rate.
   118	// Next GC is after we've allocated an extra amount of memory proportional to
   119	// the amount already in use. The proportion is controlled by GOGC environment variable
   120	// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
   121	// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
   122	// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
   123	// (and also the amount of extra memory used).
   124	
   125	package runtime
   126	
   127	import (
   128		"runtime/internal/atomic"
   129		"runtime/internal/sys"
   130		"unsafe"
   131	)
   132	
   133	const (
   134		_DebugGC         = 0
   135		_ConcurrentSweep = true
   136		_FinBlockSize    = 4 * 1024
   137	
   138		// sweepMinHeapDistance is a lower bound on the heap distance
   139		// (in bytes) reserved for concurrent sweeping between GC
   140		// cycles. This will be scaled by gcpercent/100.
   141		sweepMinHeapDistance = 1024 * 1024
   142	)
   143	
   144	// heapminimum is the minimum heap size at which to trigger GC.
   145	// For small heaps, this overrides the usual GOGC*live set rule.
   146	//
   147	// When there is a very small live set but a lot of allocation, simply
   148	// collecting when the heap reaches GOGC*live results in many GC
   149	// cycles and high total per-GC overhead. This minimum amortizes this
   150	// per-GC overhead while keeping the heap reasonably small.
   151	//
   152	// During initialization this is set to 4MB*GOGC/100. In the case of
   153	// GOGC==0, this will set heapminimum to 0, resulting in constant
   154	// collection even when the heap size is small, which is useful for
   155	// debugging.
   156	var heapminimum uint64 = defaultHeapMinimum
   157	
   158	// defaultHeapMinimum is the value of heapminimum for GOGC==100.
   159	const defaultHeapMinimum = 4 << 20
   160	
   161	// Initialized from $GOGC.  GOGC=off means no GC.
   162	var gcpercent int32
   163	
   164	func gcinit() {
   165		if unsafe.Sizeof(workbuf{}) != _WorkbufSize {
   166			throw("size of Workbuf is suboptimal")
   167		}
   168	
   169		_ = setGCPercent(readgogc())
   170		for datap := &firstmoduledata; datap != nil; datap = datap.next {
   171			datap.gcdatamask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcdata)), datap.edata-datap.data)
   172			datap.gcbssmask = progToPointerMask((*byte)(unsafe.Pointer(datap.gcbss)), datap.ebss-datap.bss)
   173		}
   174		memstats.next_gc = heapminimum
   175		work.startSema = 1
   176		work.markDoneSema = 1
   177	}
   178	
   179	func readgogc() int32 {
   180		p := gogetenv("GOGC")
   181		if p == "" {
   182			return 100
   183		}
   184		if p == "off" {
   185			return -1
   186		}
   187		return int32(atoi(p))
   188	}
   189	
   190	// gcenable is called after the bulk of the runtime initialization,
   191	// just before we're about to start letting user code run.
   192	// It kicks off the background sweeper goroutine and enables GC.
   193	func gcenable() {
   194		c := make(chan int, 1)
   195		go bgsweep(c)
   196		<-c
   197		memstats.enablegc = true // now that runtime is initialized, GC is okay
   198	}
   199	
   200	//go:linkname setGCPercent runtime/debug.setGCPercent
   201	func setGCPercent(in int32) (out int32) {
   202		lock(&mheap_.lock)
   203		out = gcpercent
   204		if in < 0 {
   205			in = -1
   206		}
   207		gcpercent = in
   208		heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100
   209		if gcController.triggerRatio > float64(gcpercent)/100 {
   210			gcController.triggerRatio = float64(gcpercent) / 100
   211		}
   212		unlock(&mheap_.lock)
   213		return out
   214	}
   215	
   216	// Garbage collector phase.
   217	// Indicates to write barrier and sychronization task to preform.
   218	var gcphase uint32
   219	
   220	// The compiler knows about this variable.
   221	// If you change it, you must change the compiler too.
   222	var writeBarrier struct {
   223		enabled bool   // compiler emits a check of this before calling write barrier
   224		needed  bool   // whether we need a write barrier for current GC phase
   225		cgo     bool   // whether we need a write barrier for a cgo check
   226		alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load
   227	}
   228	
   229	// gcBlackenEnabled is 1 if mutator assists and background mark
   230	// workers are allowed to blacken objects. This must only be set when
   231	// gcphase == _GCmark.
   232	var gcBlackenEnabled uint32
   233	
   234	// gcBlackenPromptly indicates that optimizations that may
   235	// hide work from the global work queue should be disabled.
   236	//
   237	// If gcBlackenPromptly is true, per-P gcWork caches should
   238	// be flushed immediately and new objects should be allocated black.
   239	//
   240	// There is a tension between allocating objects white and
   241	// allocating them black. If white and the objects die before being
   242	// marked they can be collected during this GC cycle. On the other
   243	// hand allocating them black will reduce _GCmarktermination latency
   244	// since more work is done in the mark phase. This tension is resolved
   245	// by allocating white until the mark phase is approaching its end and
   246	// then allocating black for the remainder of the mark phase.
   247	var gcBlackenPromptly bool
   248	
   249	const (
   250		_GCoff             = iota // GC not running; sweeping in background, write barrier disabled
   251		_GCmark                   // GC marking roots and workbufs: allocate black, write barrier ENABLED
   252		_GCmarktermination        // GC mark termination: allocate black, P's help GC, write barrier ENABLED
   253	)
   254	
   255	//go:nosplit
   256	func setGCPhase(x uint32) {
   257		atomic.Store(&gcphase, x)
   258		writeBarrier.needed = gcphase == _GCmark || gcphase == _GCmarktermination
   259		writeBarrier.enabled = writeBarrier.needed || writeBarrier.cgo
   260	}
   261	
   262	// gcMarkWorkerMode represents the mode that a concurrent mark worker
   263	// should operate in.
   264	//
   265	// Concurrent marking happens through four different mechanisms. One
   266	// is mutator assists, which happen in response to allocations and are
   267	// not scheduled. The other three are variations in the per-P mark
   268	// workers and are distinguished by gcMarkWorkerMode.
   269	type gcMarkWorkerMode int
   270	
   271	const (
   272		// gcMarkWorkerDedicatedMode indicates that the P of a mark
   273		// worker is dedicated to running that mark worker. The mark
   274		// worker should run without preemption.
   275		gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota
   276	
   277		// gcMarkWorkerFractionalMode indicates that a P is currently
   278		// running the "fractional" mark worker. The fractional worker
   279		// is necessary when GOMAXPROCS*gcGoalUtilization is not an
   280		// integer. The fractional worker should run until it is
   281		// preempted and will be scheduled to pick up the fractional
   282		// part of GOMAXPROCS*gcGoalUtilization.
   283		gcMarkWorkerFractionalMode
   284	
   285		// gcMarkWorkerIdleMode indicates that a P is running the mark
   286		// worker because it has nothing else to do. The idle worker
   287		// should run until it is preempted and account its time
   288		// against gcController.idleMarkTime.
   289		gcMarkWorkerIdleMode
   290	)
   291	
   292	// gcController implements the GC pacing controller that determines
   293	// when to trigger concurrent garbage collection and how much marking
   294	// work to do in mutator assists and background marking.
   295	//
   296	// It uses a feedback control algorithm to adjust the memstats.next_gc
   297	// trigger based on the heap growth and GC CPU utilization each cycle.
   298	// This algorithm optimizes for heap growth to match GOGC and for CPU
   299	// utilization between assist and background marking to be 25% of
   300	// GOMAXPROCS. The high-level design of this algorithm is documented
   301	// at https://golang.org/s/go15gcpacing.
   302	var gcController = gcControllerState{
   303		// Initial trigger ratio guess.
   304		triggerRatio: 7 / 8.0,
   305	}
   306	
   307	type gcControllerState struct {
   308		// scanWork is the total scan work performed this cycle. This
   309		// is updated atomically during the cycle. Updates occur in
   310		// bounded batches, since it is both written and read
   311		// throughout the cycle. At the end of the cycle, this is how
   312		// much of the retained heap is scannable.
   313		//
   314		// Currently this is the bytes of heap scanned. For most uses,
   315		// this is an opaque unit of work, but for estimation the
   316		// definition is important.
   317		scanWork int64
   318	
   319		// bgScanCredit is the scan work credit accumulated by the
   320		// concurrent background scan. This credit is accumulated by
   321		// the background scan and stolen by mutator assists. This is
   322		// updated atomically. Updates occur in bounded batches, since
   323		// it is both written and read throughout the cycle.
   324		bgScanCredit int64
   325	
   326		// assistTime is the nanoseconds spent in mutator assists
   327		// during this cycle. This is updated atomically. Updates
   328		// occur in bounded batches, since it is both written and read
   329		// throughout the cycle.
   330		assistTime int64
   331	
   332		// dedicatedMarkTime is the nanoseconds spent in dedicated
   333		// mark workers during this cycle. This is updated atomically
   334		// at the end of the concurrent mark phase.
   335		dedicatedMarkTime int64
   336	
   337		// fractionalMarkTime is the nanoseconds spent in the
   338		// fractional mark worker during this cycle. This is updated
   339		// atomically throughout the cycle and will be up-to-date if
   340		// the fractional mark worker is not currently running.
   341		fractionalMarkTime int64
   342	
   343		// idleMarkTime is the nanoseconds spent in idle marking
   344		// during this cycle. This is updated atomically throughout
   345		// the cycle.
   346		idleMarkTime int64
   347	
   348		// markStartTime is the absolute start time in nanoseconds
   349		// that assists and background mark workers started.
   350		markStartTime int64
   351	
   352		// heapGoal is the goal memstats.heap_live for when this cycle
   353		// ends. This is computed at the beginning of each cycle.
   354		heapGoal uint64
   355	
   356		// dedicatedMarkWorkersNeeded is the number of dedicated mark
   357		// workers that need to be started. This is computed at the
   358		// beginning of each cycle and decremented atomically as
   359		// dedicated mark workers get started.
   360		dedicatedMarkWorkersNeeded int64
   361	
   362		// assistWorkPerByte is the ratio of scan work to allocated
   363		// bytes that should be performed by mutator assists. This is
   364		// computed at the beginning of each cycle and updated every
   365		// time heap_scan is updated.
   366		assistWorkPerByte float64
   367	
   368		// assistBytesPerWork is 1/assistWorkPerByte.
   369		assistBytesPerWork float64
   370	
   371		// fractionalUtilizationGoal is the fraction of wall clock
   372		// time that should be spent in the fractional mark worker.
   373		// For example, if the overall mark utilization goal is 25%
   374		// and GOMAXPROCS is 6, one P will be a dedicated mark worker
   375		// and this will be set to 0.5 so that 50% of the time some P
   376		// is in a fractional mark worker. This is computed at the
   377		// beginning of each cycle.
   378		fractionalUtilizationGoal float64
   379	
   380		// triggerRatio is the heap growth ratio at which the garbage
   381		// collection cycle should start. E.g., if this is 0.6, then
   382		// GC should start when the live heap has reached 1.6 times
   383		// the heap size marked by the previous cycle. This is updated
   384		// at the end of of each cycle.
   385		triggerRatio float64
   386	
   387		_ [sys.CacheLineSize]byte
   388	
   389		// fractionalMarkWorkersNeeded is the number of fractional
   390		// mark workers that need to be started. This is either 0 or
   391		// 1. This is potentially updated atomically at every
   392		// scheduling point (hence it gets its own cache line).
   393		fractionalMarkWorkersNeeded int64
   394	
   395		_ [sys.CacheLineSize]byte
   396	}
   397	
   398	// startCycle resets the GC controller's state and computes estimates
   399	// for a new GC cycle. The caller must hold worldsema.
   400	func (c *gcControllerState) startCycle() {
   401		c.scanWork = 0
   402		c.bgScanCredit = 0
   403		c.assistTime = 0
   404		c.dedicatedMarkTime = 0
   405		c.fractionalMarkTime = 0
   406		c.idleMarkTime = 0
   407	
   408		// If this is the first GC cycle or we're operating on a very
   409		// small heap, fake heap_marked so it looks like next_gc is
   410		// the appropriate growth from heap_marked, even though the
   411		// real heap_marked may not have a meaningful value (on the
   412		// first cycle) or may be much smaller (resulting in a large
   413		// error response).
   414		if memstats.next_gc <= heapminimum {
   415			memstats.heap_marked = uint64(float64(memstats.next_gc) / (1 + c.triggerRatio))
   416			memstats.heap_reachable = memstats.heap_marked
   417		}
   418	
   419		// Compute the heap goal for this cycle
   420		c.heapGoal = memstats.heap_reachable + memstats.heap_reachable*uint64(gcpercent)/100
   421	
   422		// Ensure that the heap goal is at least a little larger than
   423		// the current live heap size. This may not be the case if GC
   424		// start is delayed or if the allocation that pushed heap_live
   425		// over next_gc is large or if the trigger is really close to
   426		// GOGC. Assist is proportional to this distance, so enforce a
   427		// minimum distance, even if it means going over the GOGC goal
   428		// by a tiny bit.
   429		if c.heapGoal < memstats.heap_live+1024*1024 {
   430			c.heapGoal = memstats.heap_live + 1024*1024
   431		}
   432	
   433		// Compute the total mark utilization goal and divide it among
   434		// dedicated and fractional workers.
   435		totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization
   436		c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal)
   437		c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded)
   438		if c.fractionalUtilizationGoal > 0 {
   439			c.fractionalMarkWorkersNeeded = 1
   440		} else {
   441			c.fractionalMarkWorkersNeeded = 0
   442		}
   443	
   444		// Clear per-P state
   445		for _, p := range &allp {
   446			if p == nil {
   447				break
   448			}
   449			p.gcAssistTime = 0
   450		}
   451	
   452		// Compute initial values for controls that are updated
   453		// throughout the cycle.
   454		c.revise()
   455	
   456		if debug.gcpacertrace > 0 {
   457			print("pacer: assist ratio=", c.assistWorkPerByte,
   458				" (scan ", memstats.heap_scan>>20, " MB in ",
   459				work.initialHeapLive>>20, "->",
   460				c.heapGoal>>20, " MB)",
   461				" workers=", c.dedicatedMarkWorkersNeeded,
   462				"+", c.fractionalMarkWorkersNeeded, "\n")
   463		}
   464	}
   465	
   466	// revise updates the assist ratio during the GC cycle to account for
   467	// improved estimates. This should be called either under STW or
   468	// whenever memstats.heap_scan or memstats.heap_live is updated (with
   469	// mheap_.lock held).
   470	//
   471	// It should only be called when gcBlackenEnabled != 0 (because this
   472	// is when assists are enabled and the necessary statistics are
   473	// available).
   474	//
   475	// TODO: Consider removing the periodic controller update altogether.
   476	// Since we switched to allocating black, in theory we shouldn't have
   477	// to change the assist ratio. However, this is still a useful hook
   478	// that we've found many uses for when experimenting.
   479	func (c *gcControllerState) revise() {
   480		// Compute the expected scan work remaining.
   481		//
   482		// Note that we currently count allocations during GC as both
   483		// scannable heap (heap_scan) and scan work completed
   484		// (scanWork), so this difference won't be changed by
   485		// allocations during GC.
   486		//
   487		// This particular estimate is a strict upper bound on the
   488		// possible remaining scan work for the current heap.
   489		// You might consider dividing this by 2 (or by
   490		// (100+GOGC)/100) to counter this over-estimation, but
   491		// benchmarks show that this has almost no effect on mean
   492		// mutator utilization, heap size, or assist time and it
   493		// introduces the danger of under-estimating and letting the
   494		// mutator outpace the garbage collector.
   495		scanWorkExpected := int64(memstats.heap_scan) - c.scanWork
   496		if scanWorkExpected < 1000 {
   497			// We set a somewhat arbitrary lower bound on
   498			// remaining scan work since if we aim a little high,
   499			// we can miss by a little.
   500			//
   501			// We *do* need to enforce that this is at least 1,
   502			// since marking is racy and double-scanning objects
   503			// may legitimately make the expected scan work
   504			// negative.
   505			scanWorkExpected = 1000
   506		}
   507	
   508		// Compute the heap distance remaining.
   509		heapDistance := int64(c.heapGoal) - int64(memstats.heap_live)
   510		if heapDistance <= 0 {
   511			// This shouldn't happen, but if it does, avoid
   512			// dividing by zero or setting the assist negative.
   513			heapDistance = 1
   514		}
   515	
   516		// Compute the mutator assist ratio so by the time the mutator
   517		// allocates the remaining heap bytes up to next_gc, it will
   518		// have done (or stolen) the remaining amount of scan work.
   519		c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance)
   520		c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected)
   521	}
   522	
   523	// endCycle updates the GC controller state at the end of the
   524	// concurrent part of the GC cycle.
   525	func (c *gcControllerState) endCycle() {
   526		h_t := c.triggerRatio // For debugging
   527	
   528		// Proportional response gain for the trigger controller. Must
   529		// be in [0, 1]. Lower values smooth out transient effects but
   530		// take longer to respond to phase changes. Higher values
   531		// react to phase changes quickly, but are more affected by
   532		// transient changes. Values near 1 may be unstable.
   533		const triggerGain = 0.5
   534	
   535		// Compute next cycle trigger ratio. First, this computes the
   536		// "error" for this cycle; that is, how far off the trigger
   537		// was from what it should have been, accounting for both heap
   538		// growth and GC CPU utilization. We compute the actual heap
   539		// growth during this cycle and scale that by how far off from
   540		// the goal CPU utilization we were (to estimate the heap
   541		// growth if we had the desired CPU utilization). The
   542		// difference between this estimate and the GOGC-based goal
   543		// heap growth is the error.
   544		//
   545		// TODO(austin): next_gc is based on heap_reachable, not
   546		// heap_marked, which means the actual growth ratio
   547		// technically isn't comparable to the trigger ratio.
   548		goalGrowthRatio := float64(gcpercent) / 100
   549		actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1
   550		assistDuration := nanotime() - c.markStartTime
   551	
   552		// Assume background mark hit its utilization goal.
   553		utilization := gcGoalUtilization
   554		// Add assist utilization; avoid divide by zero.
   555		if assistDuration > 0 {
   556			utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs))
   557		}
   558	
   559		triggerError := goalGrowthRatio - c.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-c.triggerRatio)
   560	
   561		// Finally, we adjust the trigger for next time by this error,
   562		// damped by the proportional gain.
   563		c.triggerRatio += triggerGain * triggerError
   564		if c.triggerRatio < 0 {
   565			// This can happen if the mutator is allocating very
   566			// quickly or the GC is scanning very slowly.
   567			c.triggerRatio = 0
   568		} else if c.triggerRatio > goalGrowthRatio*0.95 {
   569			// Ensure there's always a little margin so that the
   570			// mutator assist ratio isn't infinity.
   571			c.triggerRatio = goalGrowthRatio * 0.95
   572		}
   573	
   574		if debug.gcpacertrace > 0 {
   575			// Print controller state in terms of the design
   576			// document.
   577			H_m_prev := memstats.heap_marked
   578			H_T := memstats.next_gc
   579			h_a := actualGrowthRatio
   580			H_a := memstats.heap_live
   581			h_g := goalGrowthRatio
   582			H_g := int64(float64(H_m_prev) * (1 + h_g))
   583			u_a := utilization
   584			u_g := gcGoalUtilization
   585			W_a := c.scanWork
   586			print("pacer: H_m_prev=", H_m_prev,
   587				" h_t=", h_t, " H_T=", H_T,
   588				" h_a=", h_a, " H_a=", H_a,
   589				" h_g=", h_g, " H_g=", H_g,
   590				" u_a=", u_a, " u_g=", u_g,
   591				" W_a=", W_a,
   592				" goalΔ=", goalGrowthRatio-h_t,
   593				" actualΔ=", h_a-h_t,
   594				" u_a/u_g=", u_a/u_g,
   595				"\n")
   596		}
   597	}
   598	
   599	// enlistWorker encourages another dedicated mark worker to start on
   600	// another P if there are spare worker slots. It is used by putfull
   601	// when more work is made available.
   602	//
   603	//go:nowritebarrier
   604	func (c *gcControllerState) enlistWorker() {
   605		if c.dedicatedMarkWorkersNeeded <= 0 {
   606			return
   607		}
   608		// Pick a random other P to preempt.
   609		if gomaxprocs <= 1 {
   610			return
   611		}
   612		gp := getg()
   613		if gp == nil || gp.m == nil || gp.m.p == 0 {
   614			return
   615		}
   616		myID := gp.m.p.ptr().id
   617		for tries := 0; tries < 5; tries++ {
   618			id := int32(fastrand1() % uint32(gomaxprocs-1))
   619			if id >= myID {
   620				id++
   621			}
   622			p := allp[id]
   623			if p.status != _Prunning {
   624				continue
   625			}
   626			if preemptone(p) {
   627				return
   628			}
   629		}
   630	}
   631	
   632	// findRunnableGCWorker returns the background mark worker for _p_ if it
   633	// should be run. This must only be called when gcBlackenEnabled != 0.
   634	func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
   635		if gcBlackenEnabled == 0 {
   636			throw("gcControllerState.findRunnable: blackening not enabled")
   637		}
   638		if _p_.gcBgMarkWorker == 0 {
   639			// The mark worker associated with this P is blocked
   640			// performing a mark transition. We can't run it
   641			// because it may be on some other run or wait queue.
   642			return nil
   643		}
   644	
   645		if !gcMarkWorkAvailable(_p_) {
   646			// No work to be done right now. This can happen at
   647			// the end of the mark phase when there are still
   648			// assists tapering off. Don't bother running a worker
   649			// now because it'll just return immediately.
   650			return nil
   651		}
   652	
   653		decIfPositive := func(ptr *int64) bool {
   654			if *ptr > 0 {
   655				if atomic.Xaddint64(ptr, -1) >= 0 {
   656					return true
   657				}
   658				// We lost a race
   659				atomic.Xaddint64(ptr, +1)
   660			}
   661			return false
   662		}
   663	
   664		if decIfPositive(&c.dedicatedMarkWorkersNeeded) {
   665			// This P is now dedicated to marking until the end of
   666			// the concurrent mark phase.
   667			_p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode
   668			// TODO(austin): This P isn't going to run anything
   669			// else for a while, so kick everything out of its run
   670			// queue.
   671		} else {
   672			if !decIfPositive(&c.fractionalMarkWorkersNeeded) {
   673				// No more workers are need right now.
   674				return nil
   675			}
   676	
   677			// This P has picked the token for the fractional worker.
   678			// Is the GC currently under or at the utilization goal?
   679			// If so, do more work.
   680			//
   681			// We used to check whether doing one time slice of work
   682			// would remain under the utilization goal, but that has the
   683			// effect of delaying work until the mutator has run for
   684			// enough time slices to pay for the work. During those time
   685			// slices, write barriers are enabled, so the mutator is running slower.
   686			// Now instead we do the work whenever we're under or at the
   687			// utilization work and pay for it by letting the mutator run later.
   688			// This doesn't change the overall utilization averages, but it
   689			// front loads the GC work so that the GC finishes earlier and
   690			// write barriers can be turned off sooner, effectively giving
   691			// the mutator a faster machine.
   692			//
   693			// The old, slower behavior can be restored by setting
   694			//	gcForcePreemptNS = forcePreemptNS.
   695			const gcForcePreemptNS = 0
   696	
   697			// TODO(austin): We could fast path this and basically
   698			// eliminate contention on c.fractionalMarkWorkersNeeded by
   699			// precomputing the minimum time at which it's worth
   700			// next scheduling the fractional worker. Then Ps
   701			// don't have to fight in the window where we've
   702			// passed that deadline and no one has started the
   703			// worker yet.
   704			//
   705			// TODO(austin): Shorter preemption interval for mark
   706			// worker to improve fairness and give this
   707			// finer-grained control over schedule?
   708			now := nanotime() - gcController.markStartTime
   709			then := now + gcForcePreemptNS
   710			timeUsed := c.fractionalMarkTime + gcForcePreemptNS
   711			if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal {
   712				// Nope, we'd overshoot the utilization goal
   713				atomic.Xaddint64(&c.fractionalMarkWorkersNeeded, +1)
   714				return nil
   715			}
   716			_p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode
   717		}
   718	
   719		// Run the background mark worker
   720		gp := _p_.gcBgMarkWorker.ptr()
   721		casgstatus(gp, _Gwaiting, _Grunnable)
   722		if trace.enabled {
   723			traceGoUnpark(gp, 0)
   724		}
   725		return gp
   726	}
   727	
   728	// gcGoalUtilization is the goal CPU utilization for background
   729	// marking as a fraction of GOMAXPROCS.
   730	const gcGoalUtilization = 0.25
   731	
   732	// gcCreditSlack is the amount of scan work credit that can can
   733	// accumulate locally before updating gcController.scanWork and,
   734	// optionally, gcController.bgScanCredit. Lower values give a more
   735	// accurate assist ratio and make it more likely that assists will
   736	// successfully steal background credit. Higher values reduce memory
   737	// contention.
   738	const gcCreditSlack = 2000
   739	
   740	// gcAssistTimeSlack is the nanoseconds of mutator assist time that
   741	// can accumulate on a P before updating gcController.assistTime.
   742	const gcAssistTimeSlack = 5000
   743	
   744	// gcOverAssistWork determines how many extra units of scan work a GC
   745	// assist does when an assist happens. This amortizes the cost of an
   746	// assist by pre-paying for this many bytes of future allocations.
   747	const gcOverAssistWork = 64 << 10
   748	
   749	var work struct {
   750		full  uint64                   // lock-free list of full blocks workbuf
   751		empty uint64                   // lock-free list of empty blocks workbuf
   752		pad0  [sys.CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
   753	
   754		markrootNext uint32 // next markroot job
   755		markrootJobs uint32 // number of markroot jobs
   756	
   757		nproc   uint32
   758		tstart  int64
   759		nwait   uint32
   760		ndone   uint32
   761		alldone note
   762	
   763		// Number of roots of various root types. Set by gcMarkRootPrepare.
   764		nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nRescanRoots int
   765	
   766		// markrootDone indicates that roots have been marked at least
   767		// once during the current GC cycle. This is checked by root
   768		// marking operations that have to happen only during the
   769		// first root marking pass, whether that's during the
   770		// concurrent mark phase in current GC or mark termination in
   771		// STW GC.
   772		markrootDone bool
   773	
   774		// Each type of GC state transition is protected by a lock.
   775		// Since multiple threads can simultaneously detect the state
   776		// transition condition, any thread that detects a transition
   777		// condition must acquire the appropriate transition lock,
   778		// re-check the transition condition and return if it no
   779		// longer holds or perform the transition if it does.
   780		// Likewise, any transition must invalidate the transition
   781		// condition before releasing the lock. This ensures that each
   782		// transition is performed by exactly one thread and threads
   783		// that need the transition to happen block until it has
   784		// happened.
   785		//
   786		// startSema protects the transition from "off" to mark or
   787		// mark termination.
   788		startSema uint32
   789		// markDoneSema protects transitions from mark 1 to mark 2 and
   790		// from mark 2 to mark termination.
   791		markDoneSema uint32
   792	
   793		bgMarkReady note   // signal background mark worker has started
   794		bgMarkDone  uint32 // cas to 1 when at a background mark completion point
   795		// Background mark completion signaling
   796	
   797		// mode is the concurrency mode of the current GC cycle.
   798		mode gcMode
   799	
   800		// Copy of mheap.allspans for marker or sweeper.
   801		spans []*mspan
   802	
   803		// totaltime is the CPU nanoseconds spent in GC since the
   804		// program started if debug.gctrace > 0.
   805		totaltime int64
   806	
   807		// bytesMarked is the number of bytes marked this cycle. This
   808		// includes bytes blackened in scanned objects, noscan objects
   809		// that go straight to black, and permagrey objects scanned by
   810		// markroot during the concurrent scan phase. This is updated
   811		// atomically during the cycle. Updates may be batched
   812		// arbitrarily, since the value is only read at the end of the
   813		// cycle.
   814		//
   815		// Because of benign races during marking, this number may not
   816		// be the exact number of marked bytes, but it should be very
   817		// close.
   818		bytesMarked uint64
   819	
   820		// initialHeapLive is the value of memstats.heap_live at the
   821		// beginning of this GC cycle.
   822		initialHeapLive uint64
   823	
   824		// assistQueue is a queue of assists that are blocked because
   825		// there was neither enough credit to steal or enough work to
   826		// do.
   827		assistQueue struct {
   828			lock       mutex
   829			head, tail guintptr
   830		}
   831	
   832		// rescan is a list of G's that need to be rescanned during
   833		// mark termination. A G adds itself to this list when it
   834		// first invalidates its stack scan.
   835		rescan struct {
   836			lock mutex
   837			list []guintptr
   838		}
   839	
   840		// Timing/utilization stats for this cycle.
   841		stwprocs, maxprocs                 int32
   842		tSweepTerm, tMark, tMarkTerm, tEnd int64 // nanotime() of phase start
   843	
   844		pauseNS    int64 // total STW time this cycle
   845		pauseStart int64 // nanotime() of last STW
   846	
   847		// debug.gctrace heap sizes for this cycle.
   848		heap0, heap1, heap2, heapGoal uint64
   849	}
   850	
   851	// GC runs a garbage collection and blocks the caller until the
   852	// garbage collection is complete. It may also block the entire
   853	// program.
   854	func GC() {
   855		gcStart(gcForceBlockMode, false)
   856	}
   857	
   858	// gcMode indicates how concurrent a GC cycle should be.
   859	type gcMode int
   860	
   861	const (
   862		gcBackgroundMode gcMode = iota // concurrent GC and sweep
   863		gcForceMode                    // stop-the-world GC now, concurrent sweep
   864		gcForceBlockMode               // stop-the-world GC now and STW sweep
   865	)
   866	
   867	// gcShouldStart returns true if the exit condition for the _GCoff
   868	// phase has been met. The exit condition should be tested when
   869	// allocating.
   870	//
   871	// If forceTrigger is true, it ignores the current heap size, but
   872	// checks all other conditions. In general this should be false.
   873	func gcShouldStart(forceTrigger bool) bool {
   874		return gcphase == _GCoff && (forceTrigger || memstats.heap_live >= memstats.next_gc) && memstats.enablegc && panicking == 0 && gcpercent >= 0
   875	}
   876	
   877	// gcStart transitions the GC from _GCoff to _GCmark (if mode ==
   878	// gcBackgroundMode) or _GCmarktermination (if mode !=
   879	// gcBackgroundMode) by performing sweep termination and GC
   880	// initialization.
   881	//
   882	// This may return without performing this transition in some cases,
   883	// such as when called on a system stack or with locks held.
   884	func gcStart(mode gcMode, forceTrigger bool) {
   885		// Since this is called from malloc and malloc is called in
   886		// the guts of a number of libraries that might be holding
   887		// locks, don't attempt to start GC in non-preemptible or
   888		// potentially unstable situations.
   889		mp := acquirem()
   890		if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" {
   891			releasem(mp)
   892			return
   893		}
   894		releasem(mp)
   895		mp = nil
   896	
   897		// Pick up the remaining unswept/not being swept spans concurrently
   898		//
   899		// This shouldn't happen if we're being invoked in background
   900		// mode since proportional sweep should have just finished
   901		// sweeping everything, but rounding errors, etc, may leave a
   902		// few spans unswept. In forced mode, this is necessary since
   903		// GC can be forced at any point in the sweeping cycle.
   904		//
   905		// We check the transition condition continuously here in case
   906		// this G gets delayed in to the next GC cycle.
   907		for (mode != gcBackgroundMode || gcShouldStart(forceTrigger)) && gosweepone() != ^uintptr(0) {
   908			sweep.nbgsweep++
   909		}
   910	
   911		// Perform GC initialization and the sweep termination
   912		// transition.
   913		//
   914		// If this is a forced GC, don't acquire the transition lock
   915		// or re-check the transition condition because we
   916		// specifically *don't* want to share the transition with
   917		// another thread.
   918		useStartSema := mode == gcBackgroundMode
   919		if useStartSema {
   920			semacquire(&work.startSema, false)
   921			// Re-check transition condition under transition lock.
   922			if !gcShouldStart(forceTrigger) {
   923				semrelease(&work.startSema)
   924				return
   925			}
   926		}
   927	
   928		// In gcstoptheworld debug mode, upgrade the mode accordingly.
   929		// We do this after re-checking the transition condition so
   930		// that multiple goroutines that detect the heap trigger don't
   931		// start multiple STW GCs.
   932		if mode == gcBackgroundMode {
   933			if debug.gcstoptheworld == 1 {
   934				mode = gcForceMode
   935			} else if debug.gcstoptheworld == 2 {
   936				mode = gcForceBlockMode
   937			}
   938		}
   939	
   940		// Ok, we're doing it!  Stop everybody else
   941		semacquire(&worldsema, false)
   942	
   943		if trace.enabled {
   944			traceGCStart()
   945		}
   946	
   947		if mode == gcBackgroundMode {
   948			gcBgMarkStartWorkers()
   949		}
   950	
   951		gcResetMarkState()
   952	
   953		now := nanotime()
   954		work.stwprocs, work.maxprocs = gcprocs(), gomaxprocs
   955		work.tSweepTerm = now
   956		work.heap0 = memstats.heap_live
   957		work.pauseNS = 0
   958		work.mode = mode
   959	
   960		work.pauseStart = now
   961		systemstack(stopTheWorldWithSema)
   962		// Finish sweep before we start concurrent scan.
   963		systemstack(func() {
   964			finishsweep_m(true)
   965		})
   966		// clearpools before we start the GC. If we wait they memory will not be
   967		// reclaimed until the next GC cycle.
   968		clearpools()
   969	
   970		if mode == gcBackgroundMode { // Do as much work concurrently as possible
   971			gcController.startCycle()
   972			work.heapGoal = gcController.heapGoal
   973	
   974			// Enter concurrent mark phase and enable
   975			// write barriers.
   976			//
   977			// Because the world is stopped, all Ps will
   978			// observe that write barriers are enabled by
   979			// the time we start the world and begin
   980			// scanning.
   981			//
   982			// It's necessary to enable write barriers
   983			// during the scan phase for several reasons:
   984			//
   985			// They must be enabled for writes to higher
   986			// stack frames before we scan stacks and
   987			// install stack barriers because this is how
   988			// we track writes to inactive stack frames.
   989			// (Alternatively, we could not install stack
   990			// barriers over frame boundaries with
   991			// up-pointers).
   992			//
   993			// They must be enabled before assists are
   994			// enabled because they must be enabled before
   995			// any non-leaf heap objects are marked. Since
   996			// allocations are blocked until assists can
   997			// happen, we want enable assists as early as
   998			// possible.
   999			setGCPhase(_GCmark)
  1000	
  1001			// markrootSpans uses work.spans, so make sure
  1002			// it is up to date.
  1003			gcCopySpans()
  1004	
  1005			gcBgMarkPrepare() // Must happen before assist enable.
  1006			gcMarkRootPrepare()
  1007	
  1008			// At this point all Ps have enabled the write
  1009			// barrier, thus maintaining the no white to
  1010			// black invariant. Enable mutator assists to
  1011			// put back-pressure on fast allocating
  1012			// mutators.
  1013			atomic.Store(&gcBlackenEnabled, 1)
  1014	
  1015			// Assists and workers can start the moment we start
  1016			// the world.
  1017			gcController.markStartTime = now
  1018	
  1019			// Concurrent mark.
  1020			systemstack(startTheWorldWithSema)
  1021			now = nanotime()
  1022			work.pauseNS += now - work.pauseStart
  1023			work.tMark = now
  1024		} else {
  1025			t := nanotime()
  1026			work.tMark, work.tMarkTerm = t, t
  1027			work.heapGoal = work.heap0
  1028	
  1029			// Perform mark termination. This will restart the world.
  1030			gcMarkTermination()
  1031		}
  1032	
  1033		if useStartSema {
  1034			semrelease(&work.startSema)
  1035		}
  1036	}
  1037	
  1038	// gcMarkDone transitions the GC from mark 1 to mark 2 and from mark 2
  1039	// to mark termination.
  1040	//
  1041	// This should be called when all mark work has been drained. In mark
  1042	// 1, this includes all root marking jobs, global work buffers, and
  1043	// active work buffers in assists and background workers; however,
  1044	// work may still be cached in per-P work buffers. In mark 2, per-P
  1045	// caches are disabled.
  1046	//
  1047	// The calling context must be preemptible.
  1048	//
  1049	// Note that it is explicitly okay to have write barriers in this
  1050	// function because completion of concurrent mark is best-effort
  1051	// anyway. Any work created by write barriers here will be cleaned up
  1052	// by mark termination.
  1053	func gcMarkDone() {
  1054	top:
  1055		semacquire(&work.markDoneSema, false)
  1056	
  1057		// Re-check transition condition under transition lock.
  1058		if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) {
  1059			semrelease(&work.markDoneSema)
  1060			return
  1061		}
  1062	
  1063		// Disallow starting new workers so that any remaining workers
  1064		// in the current mark phase will drain out.
  1065		//
  1066		// TODO(austin): Should dedicated workers keep an eye on this
  1067		// and exit gcDrain promptly?
  1068		atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff)
  1069		atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff)
  1070	
  1071		if !gcBlackenPromptly {
  1072			// Transition from mark 1 to mark 2.
  1073			//
  1074			// The global work list is empty, but there can still be work
  1075			// sitting in the per-P work caches and there can be more
  1076			// objects reachable from global roots since they don't have write
  1077			// barriers. Rescan some roots and flush work caches.
  1078	
  1079			gcMarkRootCheck()
  1080	
  1081			// Disallow caching workbufs and indicate that we're in mark 2.
  1082			gcBlackenPromptly = true
  1083	
  1084			// Prevent completion of mark 2 until we've flushed
  1085			// cached workbufs.
  1086			atomic.Xadd(&work.nwait, -1)
  1087	
  1088			// GC is set up for mark 2. Let Gs blocked on the
  1089			// transition lock go while we flush caches.
  1090			semrelease(&work.markDoneSema)
  1091	
  1092			systemstack(func() {
  1093				// Flush all currently cached workbufs and
  1094				// ensure all Ps see gcBlackenPromptly. This
  1095				// also blocks until any remaining mark 1
  1096				// workers have exited their loop so we can
  1097				// start new mark 2 workers that will observe
  1098				// the new root marking jobs.
  1099				forEachP(func(_p_ *p) {
  1100					_p_.gcw.dispose()
  1101				})
  1102			})
  1103	
  1104			// Now we can start up mark 2 workers.
  1105			atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff)
  1106			atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff)
  1107	
  1108			incnwait := atomic.Xadd(&work.nwait, +1)
  1109			if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
  1110				// This loop will make progress because
  1111				// gcBlackenPromptly is now true, so it won't
  1112				// take this same "if" branch.
  1113				goto top
  1114			}
  1115		} else {
  1116			// Transition to mark termination.
  1117			now := nanotime()
  1118			work.tMarkTerm = now
  1119			work.pauseStart = now
  1120			getg().m.preemptoff = "gcing"
  1121			systemstack(stopTheWorldWithSema)
  1122			// The gcphase is _GCmark, it will transition to _GCmarktermination
  1123			// below. The important thing is that the wb remains active until
  1124			// all marking is complete. This includes writes made by the GC.
  1125	
  1126			// Record that one root marking pass has completed.
  1127			work.markrootDone = true
  1128	
  1129			// Disable assists and background workers. We must do
  1130			// this before waking blocked assists.
  1131			atomic.Store(&gcBlackenEnabled, 0)
  1132	
  1133			// Flush the gcWork caches. This must be done before
  1134			// endCycle since endCycle depends on statistics kept
  1135			// in these caches.
  1136			gcFlushGCWork()
  1137	
  1138			// Wake all blocked assists. These will run when we
  1139			// start the world again.
  1140			gcWakeAllAssists()
  1141	
  1142			// Likewise, release the transition lock. Blocked
  1143			// workers and assists will run when we start the
  1144			// world again.
  1145			semrelease(&work.markDoneSema)
  1146	
  1147			gcController.endCycle()
  1148	
  1149			// Perform mark termination. This will restart the world.
  1150			gcMarkTermination()
  1151		}
  1152	}
  1153	
  1154	func gcMarkTermination() {
  1155		// World is stopped.
  1156		// Start marktermination which includes enabling the write barrier.
  1157		atomic.Store(&gcBlackenEnabled, 0)
  1158		gcBlackenPromptly = false
  1159		setGCPhase(_GCmarktermination)
  1160	
  1161		work.heap1 = memstats.heap_live
  1162		startTime := nanotime()
  1163	
  1164		mp := acquirem()
  1165		mp.preemptoff = "gcing"
  1166		_g_ := getg()
  1167		_g_.m.traceback = 2
  1168		gp := _g_.m.curg
  1169		casgstatus(gp, _Grunning, _Gwaiting)
  1170		gp.waitreason = "garbage collection"
  1171	
  1172		// Run gc on the g0 stack. We do this so that the g stack
  1173		// we're currently running on will no longer change. Cuts
  1174		// the root set down a bit (g0 stacks are not scanned, and
  1175		// we don't need to scan gc's internal state).  We also
  1176		// need to switch to g0 so we can shrink the stack.
  1177		systemstack(func() {
  1178			gcMark(startTime)
  1179			// Must return immediately.
  1180			// The outer function's stack may have moved
  1181			// during gcMark (it shrinks stacks, including the
  1182			// outer function's stack), so we must not refer
  1183			// to any of its variables. Return back to the
  1184			// non-system stack to pick up the new addresses
  1185			// before continuing.
  1186		})
  1187	
  1188		systemstack(func() {
  1189			work.heap2 = work.bytesMarked
  1190			if debug.gccheckmark > 0 {
  1191				// Run a full stop-the-world mark using checkmark bits,
  1192				// to check that we didn't forget to mark anything during
  1193				// the concurrent mark process.
  1194				gcResetMarkState()
  1195				initCheckmarks()
  1196				gcMark(startTime)
  1197				clearCheckmarks()
  1198			}
  1199	
  1200			// marking is complete so we can turn the write barrier off
  1201			setGCPhase(_GCoff)
  1202			gcSweep(work.mode)
  1203	
  1204			if debug.gctrace > 1 {
  1205				startTime = nanotime()
  1206				// The g stacks have been scanned so
  1207				// they have gcscanvalid==true and gcworkdone==true.
  1208				// Reset these so that all stacks will be rescanned.
  1209				gcResetMarkState()
  1210				finishsweep_m(true)
  1211	
  1212				// Still in STW but gcphase is _GCoff, reset to _GCmarktermination
  1213				// At this point all objects will be found during the gcMark which
  1214				// does a complete STW mark and object scan.
  1215				setGCPhase(_GCmarktermination)
  1216				gcMark(startTime)
  1217				setGCPhase(_GCoff) // marking is done, turn off wb.
  1218				gcSweep(work.mode)
  1219			}
  1220		})
  1221	
  1222		_g_.m.traceback = 0
  1223		casgstatus(gp, _Gwaiting, _Grunning)
  1224	
  1225		if trace.enabled {
  1226			traceGCDone()
  1227		}
  1228	
  1229		// all done
  1230		mp.preemptoff = ""
  1231	
  1232		if gcphase != _GCoff {
  1233			throw("gc done but gcphase != _GCoff")
  1234		}
  1235	
  1236		// Update timing memstats
  1237		now, unixNow := nanotime(), unixnanotime()
  1238		work.pauseNS += now - work.pauseStart
  1239		work.tEnd = now
  1240		atomic.Store64(&memstats.last_gc, uint64(unixNow)) // must be Unix time to make sense to user
  1241		memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS)
  1242		memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow)
  1243		memstats.pause_total_ns += uint64(work.pauseNS)
  1244	
  1245		// Update work.totaltime.
  1246		sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm)
  1247		// We report idle marking time below, but omit it from the
  1248		// overall utilization here since it's "free".
  1249		markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime
  1250		markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm)
  1251		cycleCpu := sweepTermCpu + markCpu + markTermCpu
  1252		work.totaltime += cycleCpu
  1253	
  1254		// Compute overall GC CPU utilization.
  1255		totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs)
  1256		memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu)
  1257	
  1258		memstats.numgc++
  1259	
  1260		// Reset sweep state.
  1261		sweep.nbgsweep = 0
  1262		sweep.npausesweep = 0
  1263	
  1264		systemstack(startTheWorldWithSema)
  1265	
  1266		// Free stack spans. This must be done between GC cycles.
  1267		systemstack(freeStackSpans)
  1268	
  1269		// Best-effort remove stack barriers so they don't get in the
  1270		// way of things like GDB and perf.
  1271		lock(&allglock)
  1272		myallgs := allgs
  1273		unlock(&allglock)
  1274		gcTryRemoveAllStackBarriers(myallgs)
  1275	
  1276		// Print gctrace before dropping worldsema. As soon as we drop
  1277		// worldsema another cycle could start and smash the stats
  1278		// we're trying to print.
  1279		if debug.gctrace > 0 {
  1280			util := int(memstats.gc_cpu_fraction * 100)
  1281	
  1282			var sbuf [24]byte
  1283			printlock()
  1284			print("gc ", memstats.numgc,
  1285				" @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ",
  1286				util, "%: ")
  1287			prev := work.tSweepTerm
  1288			for i, ns := range []int64{work.tMark, work.tMarkTerm, work.tEnd} {
  1289				if i != 0 {
  1290					print("+")
  1291				}
  1292				print(string(fmtNSAsMS(sbuf[:], uint64(ns-prev))))
  1293				prev = ns
  1294			}
  1295			print(" ms clock, ")
  1296			for i, ns := range []int64{sweepTermCpu, gcController.assistTime, gcController.dedicatedMarkTime + gcController.fractionalMarkTime, gcController.idleMarkTime, markTermCpu} {
  1297				if i == 2 || i == 3 {
  1298					// Separate mark time components with /.
  1299					print("/")
  1300				} else if i != 0 {
  1301					print("+")
  1302				}
  1303				print(string(fmtNSAsMS(sbuf[:], uint64(ns))))
  1304			}
  1305			print(" ms cpu, ",
  1306				work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ",
  1307				work.heapGoal>>20, " MB goal, ",
  1308				work.maxprocs, " P")
  1309			if work.mode != gcBackgroundMode {
  1310				print(" (forced)")
  1311			}
  1312			print("\n")
  1313			printunlock()
  1314		}
  1315	
  1316		semrelease(&worldsema)
  1317		// Careful: another GC cycle may start now.
  1318	
  1319		releasem(mp)
  1320		mp = nil
  1321	
  1322		// now that gc is done, kick off finalizer thread if needed
  1323		if !concurrentSweep {
  1324			// give the queued finalizers, if any, a chance to run
  1325			Gosched()
  1326		}
  1327	}
  1328	
  1329	// gcBgMarkStartWorkers prepares background mark worker goroutines.
  1330	// These goroutines will not run until the mark phase, but they must
  1331	// be started while the work is not stopped and from a regular G
  1332	// stack. The caller must hold worldsema.
  1333	func gcBgMarkStartWorkers() {
  1334		// Background marking is performed by per-P G's. Ensure that
  1335		// each P has a background GC G.
  1336		for _, p := range &allp {
  1337			if p == nil || p.status == _Pdead {
  1338				break
  1339			}
  1340			if p.gcBgMarkWorker == 0 {
  1341				go gcBgMarkWorker(p)
  1342				notetsleepg(&work.bgMarkReady, -1)
  1343				noteclear(&work.bgMarkReady)
  1344			}
  1345		}
  1346	}
  1347	
  1348	// gcBgMarkPrepare sets up state for background marking.
  1349	// Mutator assists must not yet be enabled.
  1350	func gcBgMarkPrepare() {
  1351		// Background marking will stop when the work queues are empty
  1352		// and there are no more workers (note that, since this is
  1353		// concurrent, this may be a transient state, but mark
  1354		// termination will clean it up). Between background workers
  1355		// and assists, we don't really know how many workers there
  1356		// will be, so we pretend to have an arbitrarily large number
  1357		// of workers, almost all of which are "waiting". While a
  1358		// worker is working it decrements nwait. If nproc == nwait,
  1359		// there are no workers.
  1360		work.nproc = ^uint32(0)
  1361		work.nwait = ^uint32(0)
  1362	}
  1363	
  1364	func gcBgMarkWorker(_p_ *p) {
  1365		gp := getg()
  1366	
  1367		type parkInfo struct {
  1368			m      muintptr // Release this m on park.
  1369			attach puintptr // If non-nil, attach to this p on park.
  1370		}
  1371		// We pass park to a gopark unlock function, so it can't be on
  1372		// the stack (see gopark). Prevent deadlock from recursively
  1373		// starting GC by disabling preemption.
  1374		gp.m.preemptoff = "GC worker init"
  1375		park := new(parkInfo)
  1376		gp.m.preemptoff = ""
  1377	
  1378		park.m.set(acquirem())
  1379		park.attach.set(_p_)
  1380		// Inform gcBgMarkStartWorkers that this worker is ready.
  1381		// After this point, the background mark worker is scheduled
  1382		// cooperatively by gcController.findRunnable. Hence, it must
  1383		// never be preempted, as this would put it into _Grunnable
  1384		// and put it on a run queue. Instead, when the preempt flag
  1385		// is set, this puts itself into _Gwaiting to be woken up by
  1386		// gcController.findRunnable at the appropriate time.
  1387		notewakeup(&work.bgMarkReady)
  1388	
  1389		for {
  1390			// Go to sleep until woken by gcController.findRunnable.
  1391			// We can't releasem yet since even the call to gopark
  1392			// may be preempted.
  1393			gopark(func(g *g, parkp unsafe.Pointer) bool {
  1394				park := (*parkInfo)(parkp)
  1395	
  1396				// The worker G is no longer running, so it's
  1397				// now safe to allow preemption.
  1398				releasem(park.m.ptr())
  1399	
  1400				// If the worker isn't attached to its P,
  1401				// attach now. During initialization and after
  1402				// a phase change, the worker may have been
  1403				// running on a different P. As soon as we
  1404				// attach, the owner P may schedule the
  1405				// worker, so this must be done after the G is
  1406				// stopped.
  1407				if park.attach != 0 {
  1408					p := park.attach.ptr()
  1409					park.attach.set(nil)
  1410					// cas the worker because we may be
  1411					// racing with a new worker starting
  1412					// on this P.
  1413					if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) {
  1414						// The P got a new worker.
  1415						// Exit this worker.
  1416						return false
  1417					}
  1418				}
  1419				return true
  1420			}, unsafe.Pointer(park), "GC worker (idle)", traceEvGoBlock, 0)
  1421	
  1422			// Loop until the P dies and disassociates this
  1423			// worker (the P may later be reused, in which case
  1424			// it will get a new worker) or we failed to associate.
  1425			if _p_.gcBgMarkWorker.ptr() != gp {
  1426				break
  1427			}
  1428	
  1429			// Disable preemption so we can use the gcw. If the
  1430			// scheduler wants to preempt us, we'll stop draining,
  1431			// dispose the gcw, and then preempt.
  1432			park.m.set(acquirem())
  1433	
  1434			if gcBlackenEnabled == 0 {
  1435				throw("gcBgMarkWorker: blackening not enabled")
  1436			}
  1437	
  1438			startTime := nanotime()
  1439	
  1440			decnwait := atomic.Xadd(&work.nwait, -1)
  1441			if decnwait == work.nproc {
  1442				println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
  1443				throw("work.nwait was > work.nproc")
  1444			}
  1445	
  1446			switch _p_.gcMarkWorkerMode {
  1447			default:
  1448				throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
  1449			case gcMarkWorkerDedicatedMode:
  1450				gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
  1451			case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode:
  1452				gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
  1453			}
  1454	
  1455			// If we are nearing the end of mark, dispose
  1456			// of the cache promptly. We must do this
  1457			// before signaling that we're no longer
  1458			// working so that other workers can't observe
  1459			// no workers and no work while we have this
  1460			// cached, and before we compute done.
  1461			if gcBlackenPromptly {
  1462				_p_.gcw.dispose()
  1463			}
  1464	
  1465			// Account for time.
  1466			duration := nanotime() - startTime
  1467			switch _p_.gcMarkWorkerMode {
  1468			case gcMarkWorkerDedicatedMode:
  1469				atomic.Xaddint64(&gcController.dedicatedMarkTime, duration)
  1470				atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1)
  1471			case gcMarkWorkerFractionalMode:
  1472				atomic.Xaddint64(&gcController.fractionalMarkTime, duration)
  1473				atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 1)
  1474			case gcMarkWorkerIdleMode:
  1475				atomic.Xaddint64(&gcController.idleMarkTime, duration)
  1476			}
  1477	
  1478			// Was this the last worker and did we run out
  1479			// of work?
  1480			incnwait := atomic.Xadd(&work.nwait, +1)
  1481			if incnwait > work.nproc {
  1482				println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode,
  1483					"work.nwait=", incnwait, "work.nproc=", work.nproc)
  1484				throw("work.nwait > work.nproc")
  1485			}
  1486	
  1487			// If this worker reached a background mark completion
  1488			// point, signal the main GC goroutine.
  1489			if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
  1490				// Make this G preemptible and disassociate it
  1491				// as the worker for this P so
  1492				// findRunnableGCWorker doesn't try to
  1493				// schedule it.
  1494				_p_.gcBgMarkWorker.set(nil)
  1495				releasem(park.m.ptr())
  1496	
  1497				gcMarkDone()
  1498	
  1499				// Disable preemption and prepare to reattach
  1500				// to the P.
  1501				//
  1502				// We may be running on a different P at this
  1503				// point, so we can't reattach until this G is
  1504				// parked.
  1505				park.m.set(acquirem())
  1506				park.attach.set(_p_)
  1507			}
  1508		}
  1509	}
  1510	
  1511	// gcMarkWorkAvailable returns true if executing a mark worker
  1512	// on p is potentially useful. p may be nil, in which case it only
  1513	// checks the global sources of work.
  1514	func gcMarkWorkAvailable(p *p) bool {
  1515		if p != nil && !p.gcw.empty() {
  1516			return true
  1517		}
  1518		if atomic.Load64(&work.full) != 0 {
  1519			return true // global work available
  1520		}
  1521		if work.markrootNext < work.markrootJobs {
  1522			return true // root scan work available
  1523		}
  1524		return false
  1525	}
  1526	
  1527	// gcFlushGCWork disposes the gcWork caches of all Ps. The world must
  1528	// be stopped.
  1529	//go:nowritebarrier
  1530	func gcFlushGCWork() {
  1531		// Gather all cached GC work. All other Ps are stopped, so
  1532		// it's safe to manipulate their GC work caches.
  1533		for i := 0; i < int(gomaxprocs); i++ {
  1534			allp[i].gcw.dispose()
  1535		}
  1536	}
  1537	
  1538	// gcMark runs the mark (or, for concurrent GC, mark termination)
  1539	// STW is in effect at this point.
  1540	//TODO go:nowritebarrier
  1541	func gcMark(start_time int64) {
  1542		if debug.allocfreetrace > 0 {
  1543			tracegc()
  1544		}
  1545	
  1546		if gcphase != _GCmarktermination {
  1547			throw("in gcMark expecting to see gcphase as _GCmarktermination")
  1548		}
  1549		work.tstart = start_time
  1550	
  1551		gcCopySpans() // TODO(rlh): should this be hoisted and done only once? Right now it is done for normal marking and also for checkmarking.
  1552	
  1553		// Make sure the per-P gcWork caches are empty. During mark
  1554		// termination, these caches can still be used temporarily,
  1555		// but must be disposed to the global lists immediately.
  1556		gcFlushGCWork()
  1557	
  1558		// Queue root marking jobs.
  1559		gcMarkRootPrepare()
  1560	
  1561		work.nwait = 0
  1562		work.ndone = 0
  1563		work.nproc = uint32(gcprocs())
  1564	
  1565		if trace.enabled {
  1566			traceGCScanStart()
  1567		}
  1568	
  1569		if work.nproc > 1 {
  1570			noteclear(&work.alldone)
  1571			helpgc(int32(work.nproc))
  1572		}
  1573	
  1574		gchelperstart()
  1575	
  1576		gcw := &getg().m.p.ptr().gcw
  1577		gcDrain(gcw, gcDrainBlock)
  1578		gcw.dispose()
  1579	
  1580		if debug.gccheckmark > 0 {
  1581			// This is expensive when there's a large number of
  1582			// Gs, so only do it if checkmark is also enabled.
  1583			gcMarkRootCheck()
  1584		}
  1585		if work.full != 0 {
  1586			throw("work.full != 0")
  1587		}
  1588	
  1589		if work.nproc > 1 {
  1590			notesleep(&work.alldone)
  1591		}
  1592	
  1593		// Record that at least one root marking pass has completed.
  1594		work.markrootDone = true
  1595	
  1596		for i := 0; i < int(gomaxprocs); i++ {
  1597			gcw := &allp[i].gcw
  1598			if !gcw.empty() {
  1599				throw("P has cached GC work at end of mark termination")
  1600			}
  1601			if gcw.scanWork != 0 || gcw.bytesMarked != 0 {
  1602				throw("P has unflushed stats at end of mark termination")
  1603			}
  1604		}
  1605	
  1606		if trace.enabled {
  1607			traceGCScanDone()
  1608		}
  1609	
  1610		cachestats()
  1611	
  1612		// Update the reachable heap stat.
  1613		memstats.heap_reachable = work.bytesMarked
  1614	
  1615		// Trigger the next GC cycle when the allocated heap has grown
  1616		// by triggerRatio over the reachable heap size. Assume that
  1617		// we're in steady state, so the reachable heap size is the
  1618		// same now as it was at the beginning of the GC cycle.
  1619		memstats.next_gc = uint64(float64(memstats.heap_reachable) * (1 + gcController.triggerRatio))
  1620		if memstats.next_gc < heapminimum {
  1621			memstats.next_gc = heapminimum
  1622		}
  1623		if int64(memstats.next_gc) < 0 {
  1624			print("next_gc=", memstats.next_gc, " bytesMarked=", work.bytesMarked, " heap_live=", memstats.heap_live, " initialHeapLive=", work.initialHeapLive, "\n")
  1625			throw("next_gc underflow")
  1626		}
  1627	
  1628		// Update other GC heap size stats. This must happen after
  1629		// cachestats (which flushes local statistics to these) and
  1630		// flushallmcaches (which modifies heap_live).
  1631		memstats.heap_live = work.bytesMarked
  1632		memstats.heap_marked = work.bytesMarked
  1633		memstats.heap_scan = uint64(gcController.scanWork)
  1634	
  1635		minNextGC := memstats.heap_live + sweepMinHeapDistance*uint64(gcpercent)/100
  1636		if memstats.next_gc < minNextGC {
  1637			// The allocated heap is already past the trigger.
  1638			// This can happen if the triggerRatio is very low and
  1639			// the reachable heap estimate is less than the live
  1640			// heap size.
  1641			//
  1642			// Concurrent sweep happens in the heap growth from
  1643			// heap_live to next_gc, so bump next_gc up to ensure
  1644			// that concurrent sweep has some heap growth in which
  1645			// to perform sweeping before we start the next GC
  1646			// cycle.
  1647			memstats.next_gc = minNextGC
  1648		}
  1649	
  1650		if trace.enabled {
  1651			traceHeapAlloc()
  1652			traceNextGC()
  1653		}
  1654	}
  1655	
  1656	func gcSweep(mode gcMode) {
  1657		if gcphase != _GCoff {
  1658			throw("gcSweep being done but phase is not GCoff")
  1659		}
  1660		gcCopySpans()
  1661	
  1662		lock(&mheap_.lock)
  1663		mheap_.sweepgen += 2
  1664		mheap_.sweepdone = 0
  1665		sweep.spanidx = 0
  1666		unlock(&mheap_.lock)
  1667	
  1668		if !_ConcurrentSweep || mode == gcForceBlockMode {
  1669			// Special case synchronous sweep.
  1670			// Record that no proportional sweeping has to happen.
  1671			lock(&mheap_.lock)
  1672			mheap_.sweepPagesPerByte = 0
  1673			mheap_.pagesSwept = 0
  1674			unlock(&mheap_.lock)
  1675			// Sweep all spans eagerly.
  1676			for sweepone() != ^uintptr(0) {
  1677				sweep.npausesweep++
  1678			}
  1679			// Do an additional mProf_GC, because all 'free' events are now real as well.
  1680			mProf_GC()
  1681			mProf_GC()
  1682			return
  1683		}
  1684	
  1685		// Concurrent sweep needs to sweep all of the in-use pages by
  1686		// the time the allocated heap reaches the GC trigger. Compute
  1687		// the ratio of in-use pages to sweep per byte allocated.
  1688		heapDistance := int64(memstats.next_gc) - int64(memstats.heap_live)
  1689		// Add a little margin so rounding errors and concurrent
  1690		// sweep are less likely to leave pages unswept when GC starts.
  1691		heapDistance -= 1024 * 1024
  1692		if heapDistance < _PageSize {
  1693			// Avoid setting the sweep ratio extremely high
  1694			heapDistance = _PageSize
  1695		}
  1696		lock(&mheap_.lock)
  1697		mheap_.sweepPagesPerByte = float64(mheap_.pagesInUse) / float64(heapDistance)
  1698		mheap_.pagesSwept = 0
  1699		mheap_.spanBytesAlloc = 0
  1700		unlock(&mheap_.lock)
  1701	
  1702		// Background sweep.
  1703		lock(&sweep.lock)
  1704		if sweep.parked {
  1705			sweep.parked = false
  1706			ready(sweep.g, 0, true)
  1707		}
  1708		unlock(&sweep.lock)
  1709		mProf_GC()
  1710	}
  1711	
  1712	func gcCopySpans() {
  1713		// Cache runtime.mheap_.allspans in work.spans to avoid conflicts with
  1714		// resizing/freeing allspans.
  1715		// New spans can be created while GC progresses, but they are not garbage for
  1716		// this round:
  1717		//  - new stack spans can be created even while the world is stopped.
  1718		//  - new malloc spans can be created during the concurrent sweep
  1719		// Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
  1720		lock(&mheap_.lock)
  1721		// Free the old cached mark array if necessary.
  1722		if work.spans != nil && &work.spans[0] != &h_allspans[0] {
  1723			sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys)
  1724		}
  1725		// Cache the current array for sweeping.
  1726		mheap_.gcspans = mheap_.allspans
  1727		work.spans = h_allspans
  1728		unlock(&mheap_.lock)
  1729	}
  1730	
  1731	// gcResetMarkState resets global state prior to marking (concurrent
  1732	// or STW) and resets the stack scan state of all Gs.
  1733	//
  1734	// This is safe to do without the world stopped because any Gs created
  1735	// during or after this will start out in the reset state.
  1736	func gcResetMarkState() {
  1737		// This may be called during a concurrent phase, so make sure
  1738		// allgs doesn't change.
  1739		if !(gcphase == _GCoff || gcphase == _GCmarktermination) {
  1740			// Accessing gcRescan is unsafe.
  1741			throw("bad GC phase")
  1742		}
  1743		lock(&allglock)
  1744		for _, gp := range allgs {
  1745			gp.gcscandone = false  // set to true in gcphasework
  1746			gp.gcscanvalid = false // stack has not been scanned
  1747			gp.gcRescan = -1
  1748			gp.gcAssistBytes = 0
  1749		}
  1750		unlock(&allglock)
  1751	
  1752		// Clear rescan list.
  1753		work.rescan.list = work.rescan.list[:0]
  1754	
  1755		work.bytesMarked = 0
  1756		work.initialHeapLive = memstats.heap_live
  1757		work.markrootDone = false
  1758	}
  1759	
  1760	// Hooks for other packages
  1761	
  1762	var poolcleanup func()
  1763	
  1764	//go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup
  1765	func sync_runtime_registerPoolCleanup(f func()) {
  1766		poolcleanup = f
  1767	}
  1768	
  1769	func clearpools() {
  1770		// clear sync.Pools
  1771		if poolcleanup != nil {
  1772			poolcleanup()
  1773		}
  1774	
  1775		// Clear central sudog cache.
  1776		// Leave per-P caches alone, they have strictly bounded size.
  1777		// Disconnect cached list before dropping it on the floor,
  1778		// so that a dangling ref to one entry does not pin all of them.
  1779		lock(&sched.sudoglock)
  1780		var sg, sgnext *sudog
  1781		for sg = sched.sudogcache; sg != nil; sg = sgnext {
  1782			sgnext = sg.next
  1783			sg.next = nil
  1784		}
  1785		sched.sudogcache = nil
  1786		unlock(&sched.sudoglock)
  1787	
  1788		// Clear central defer pools.
  1789		// Leave per-P pools alone, they have strictly bounded size.
  1790		lock(&sched.deferlock)
  1791		for i := range sched.deferpool {
  1792			// disconnect cached list before dropping it on the floor,
  1793			// so that a dangling ref to one entry does not pin all of them.
  1794			var d, dlink *_defer
  1795			for d = sched.deferpool[i]; d != nil; d = dlink {
  1796				dlink = d.link
  1797				d.link = nil
  1798			}
  1799			sched.deferpool[i] = nil
  1800		}
  1801		unlock(&sched.deferlock)
  1802	}
  1803	
  1804	// Timing
  1805	
  1806	//go:nowritebarrier
  1807	func gchelper() {
  1808		_g_ := getg()
  1809		_g_.m.traceback = 2
  1810		gchelperstart()
  1811	
  1812		if trace.enabled {
  1813			traceGCScanStart()
  1814		}
  1815	
  1816		// Parallel mark over GC roots and heap
  1817		if gcphase == _GCmarktermination {
  1818			gcw := &_g_.m.p.ptr().gcw
  1819			gcDrain(gcw, gcDrainBlock) // blocks in getfull
  1820			gcw.dispose()
  1821		}
  1822	
  1823		if trace.enabled {
  1824			traceGCScanDone()
  1825		}
  1826	
  1827		nproc := work.nproc // work.nproc can change right after we increment work.ndone
  1828		if atomic.Xadd(&work.ndone, +1) == nproc-1 {
  1829			notewakeup(&work.alldone)
  1830		}
  1831		_g_.m.traceback = 0
  1832	}
  1833	
  1834	func gchelperstart() {
  1835		_g_ := getg()
  1836	
  1837		if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc {
  1838			throw("gchelperstart: bad m->helpgc")
  1839		}
  1840		if _g_ != _g_.m.g0 {
  1841			throw("gchelper not running on g0 stack")
  1842		}
  1843	}
  1844	
  1845	// itoaDiv formats val/(10**dec) into buf.
  1846	func itoaDiv(buf []byte, val uint64, dec int) []byte {
  1847		i := len(buf) - 1
  1848		idec := i - dec
  1849		for val >= 10 || i >= idec {
  1850			buf[i] = byte(val%10 + '0')
  1851			i--
  1852			if i == idec {
  1853				buf[i] = '.'
  1854				i--
  1855			}
  1856			val /= 10
  1857		}
  1858		buf[i] = byte(val + '0')
  1859		return buf[i:]
  1860	}
  1861	
  1862	// fmtNSAsMS nicely formats ns nanoseconds as milliseconds.
  1863	func fmtNSAsMS(buf []byte, ns uint64) []byte {
  1864		if ns >= 10e6 {
  1865			// Format as whole milliseconds.
  1866			return itoaDiv(buf, ns/1e6, 0)
  1867		}
  1868		// Format two digits of precision, with at most three decimal places.
  1869		x := ns / 1e3
  1870		if x == 0 {
  1871			buf[0] = '0'
  1872			return buf[:1]
  1873		}
  1874		dec := 3
  1875		for x >= 100 {
  1876			x /= 10
  1877			dec--
  1878		}
  1879		return itoaDiv(buf, x, dec)
  1880	}
  1881	

View as plain text