Source file src/runtime/mgcscavenge.go

Documentation: runtime

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Scavenging free pages.
     6  //
     7  // This file implements scavenging (the release of physical pages backing mapped
     8  // memory) of free and unused pages in the heap as a way to deal with page-level
     9  // fragmentation and reduce the RSS of Go applications.
    10  //
    11  // Scavenging in Go happens on two fronts: there's the background
    12  // (asynchronous) scavenger and the heap-growth (synchronous) scavenger.
    13  //
    14  // The former happens on a goroutine much like the background sweeper which is
    15  // soft-capped at using scavengePercent of the mutator's time, based on
    16  // order-of-magnitude estimates of the costs of scavenging. The background
    17  // scavenger's primary goal is to bring the estimated heap RSS of the
    18  // application down to a goal.
    19  //
    20  // That goal is defined as:
    21  //   (retainExtraPercent+100) / 100 * (next_gc / last_next_gc) * last_heap_inuse
    22  //
    23  // Essentially, we wish to have the application's RSS track the heap goal, but
    24  // the heap goal is defined in terms of bytes of objects, rather than pages like
    25  // RSS. As a result, we need to take into account for fragmentation internal to
    26  // spans. next_gc / last_next_gc defines the ratio between the current heap goal
    27  // and the last heap goal, which tells us by how much the heap is growing and
    28  // shrinking. We estimate what the heap will grow to in terms of pages by taking
    29  // this ratio and multiplying it by heap_inuse at the end of the last GC, which
    30  // allows us to account for this additional fragmentation. Note that this
    31  // procedure makes the assumption that the degree of fragmentation won't change
    32  // dramatically over the next GC cycle. Overestimating the amount of
    33  // fragmentation simply results in higher memory use, which will be accounted
    34  // for by the next pacing up date. Underestimating the fragmentation however
    35  // could lead to performance degradation. Handling this case is not within the
    36  // scope of the scavenger. Situations where the amount of fragmentation balloons
    37  // over the course of a single GC cycle should be considered pathologies,
    38  // flagged as bugs, and fixed appropriately.
    39  //
    40  // An additional factor of retainExtraPercent is added as a buffer to help ensure
    41  // that there's more unscavenged memory to allocate out of, since each allocation
    42  // out of scavenged memory incurs a potentially expensive page fault.
    43  //
    44  // The goal is updated after each GC and the scavenger's pacing parameters
    45  // (which live in mheap_) are updated to match. The pacing parameters work much
    46  // like the background sweeping parameters. The parameters define a line whose
    47  // horizontal axis is time and vertical axis is estimated heap RSS, and the
    48  // scavenger attempts to stay below that line at all times.
    49  //
    50  // The synchronous heap-growth scavenging happens whenever the heap grows in
    51  // size, for some definition of heap-growth. The intuition behind this is that
    52  // the application had to grow the heap because existing fragments were
    53  // not sufficiently large to satisfy a page-level memory allocation, so we
    54  // scavenge those fragments eagerly to offset the growth in RSS that results.
    55  
    56  package runtime
    57  
    58  const (
    59  	// The background scavenger is paced according to these parameters.
    60  	//
    61  	// scavengePercent represents the portion of mutator time we're willing
    62  	// to spend on scavenging in percent.
    63  	//
    64  	// scavengePageLatency is a worst-case estimate (order-of-magnitude) of
    65  	// the time it takes to scavenge one (regular-sized) page of memory.
    66  	// scavengeHugePageLatency is the same but for huge pages.
    67  	//
    68  	// scavengePagePeriod is derived from scavengePercent and scavengePageLatency,
    69  	// and represents the average time between scavenging one page that we're
    70  	// aiming for. scavengeHugePagePeriod is the same but for huge pages.
    71  	// These constants are core to the scavenge pacing algorithm.
    72  	scavengePercent         = 1    // 1%
    73  	scavengePageLatency     = 10e3 // 10µs
    74  	scavengeHugePageLatency = 10e3 // 10µs
    75  	scavengePagePeriod      = scavengePageLatency / (scavengePercent / 100.0)
    76  	scavengeHugePagePeriod  = scavengePageLatency / (scavengePercent / 100.0)
    77  
    78  	// retainExtraPercent represents the amount of memory over the heap goal
    79  	// that the scavenger should keep as a buffer space for the allocator.
    80  	//
    81  	// The purpose of maintaining this overhead is to have a greater pool of
    82  	// unscavenged memory available for allocation (since using scavenged memory
    83  	// incurs an additional cost), to account for heap fragmentation and
    84  	// the ever-changing layout of the heap.
    85  	retainExtraPercent = 10
    86  )
    87  
    88  // heapRetained returns an estimate of the current heap RSS.
    89  //
    90  // mheap_.lock must be held or the world must be stopped.
    91  func heapRetained() uint64 {
    92  	return memstats.heap_sys - memstats.heap_released
    93  }
    94  
    95  // gcPaceScavenger updates the scavenger's pacing, particularly
    96  // its rate and RSS goal.
    97  //
    98  // The RSS goal is based on the current heap goal with a small overhead
    99  // to accomodate non-determinism in the allocator.
   100  //
   101  // The pacing is based on scavengePageRate, which applies to both regular and
   102  // huge pages. See that constant for more information.
   103  //
   104  // mheap_.lock must be held or the world must be stopped.
   105  func gcPaceScavenger() {
   106  	// If we're called before the first GC completed, disable scavenging.
   107  	// We never scavenge before the 2nd GC cycle anyway (we don't have enough
   108  	// information about the heap yet) so this is fine, and avoids a fault
   109  	// or garbage data later.
   110  	if memstats.last_next_gc == 0 {
   111  		mheap_.scavengeBytesPerNS = 0
   112  		return
   113  	}
   114  	// Compute our scavenging goal.
   115  	goalRatio := float64(memstats.next_gc) / float64(memstats.last_next_gc)
   116  	retainedGoal := uint64(float64(memstats.last_heap_inuse) * goalRatio)
   117  	// Add retainExtraPercent overhead to retainedGoal. This calculation
   118  	// looks strange but the purpose is to arrive at an integer division
   119  	// (e.g. if retainExtraPercent = 12.5, then we get a divisor of 8)
   120  	// that also avoids the overflow from a multiplication.
   121  	retainedGoal += retainedGoal / (1.0 / (retainExtraPercent / 100.0))
   122  	// Align it to a physical page boundary to make the following calculations
   123  	// a bit more exact.
   124  	retainedGoal = (retainedGoal + uint64(physPageSize) - 1) &^ (uint64(physPageSize) - 1)
   125  
   126  	// Represents where we are now in the heap's contribution to RSS in bytes.
   127  	//
   128  	// Guaranteed to always be a multiple of physPageSize on systems where
   129  	// physPageSize <= pageSize since we map heap_sys at a rate larger than
   130  	// any physPageSize and released memory in multiples of the physPageSize.
   131  	//
   132  	// However, certain functions recategorize heap_sys as other stats (e.g.
   133  	// stack_sys) and this happens in multiples of pageSize, so on systems
   134  	// where physPageSize > pageSize the calculations below will not be exact.
   135  	// Generally this is OK since we'll be off by at most one regular
   136  	// physical page.
   137  	retainedNow := heapRetained()
   138  
   139  	// If we're already below our goal, publish the goal in case it changed
   140  	// then disable the background scavenger.
   141  	if retainedNow <= retainedGoal {
   142  		mheap_.scavengeRetainedGoal = retainedGoal
   143  		mheap_.scavengeBytesPerNS = 0
   144  		return
   145  	}
   146  
   147  	// Now we start to compute the total amount of work necessary and the total
   148  	// amount of time we're willing to give the scavenger to complete this work.
   149  	// This will involve calculating how much of the work consists of huge pages
   150  	// and how much consists of regular pages since the former can let us scavenge
   151  	// more memory in the same time.
   152  	totalWork := retainedNow - retainedGoal
   153  
   154  	// On systems without huge page support, all work is regular work.
   155  	regularWork := totalWork
   156  	hugeTime := uint64(0)
   157  
   158  	// On systems where we have huge pages, we want to do as much of the
   159  	// scavenging work as possible on huge pages, because the costs are the
   160  	// same per page, but we can give back more more memory in a shorter
   161  	// period of time.
   162  	if physHugePageSize != 0 {
   163  		// Start by computing the amount of free memory we have in huge pages
   164  		// in total. Trivially, this is all the huge page work we need to do.
   165  		hugeWork := uint64(mheap_.free.unscavHugePages) << physHugePageShift
   166  
   167  		// ...but it could turn out that there's more huge work to do than
   168  		// total work, so cap it at total work. This might happen for very large
   169  		// heaps where the additional factor of retainExtraPercent can make it so
   170  		// that there are free chunks of memory larger than a huge page that we don't want
   171  		// to scavenge.
   172  		if hugeWork >= totalWork {
   173  			hugePages := totalWork >> physHugePageShift
   174  			hugeWork = hugePages << physHugePageShift
   175  		}
   176  		// Everything that's not huge work is regular work. At this point we
   177  		// know huge work so we can calculate how much time that will take
   178  		// based on scavengePageRate (which applies to pages of any size).
   179  		regularWork = totalWork - hugeWork
   180  		hugeTime = (hugeWork >> physHugePageShift) * scavengeHugePagePeriod
   181  	}
   182  	// Finally, we can compute how much time it'll take to do the regular work
   183  	// and the total time to do all the work.
   184  	regularTime := regularWork / uint64(physPageSize) * scavengePagePeriod
   185  	totalTime := hugeTime + regularTime
   186  
   187  	now := nanotime()
   188  
   189  	// Update all the pacing parameters in mheap with scavenge.lock held,
   190  	// so that scavenge.gen is kept in sync with the updated values.
   191  	mheap_.scavengeRetainedGoal = retainedGoal
   192  	mheap_.scavengeRetainedBasis = retainedNow
   193  	mheap_.scavengeTimeBasis = now
   194  	mheap_.scavengeBytesPerNS = float64(totalWork) / float64(totalTime)
   195  	mheap_.scavengeGen++ // increase scavenge generation
   196  }
   197  
   198  // Sleep/wait state of the background scavenger.
   199  var scavenge struct {
   200  	lock   mutex
   201  	g      *g
   202  	parked bool
   203  	timer  *timer
   204  
   205  	// Generation counter.
   206  	//
   207  	// It represents the last generation count (as defined by
   208  	// mheap_.scavengeGen) checked by the scavenger and is updated
   209  	// each time the scavenger checks whether it is on-pace.
   210  	//
   211  	// Skew between this field and mheap_.scavengeGen is used to
   212  	// determine whether a new update is available.
   213  	//
   214  	// Protected by mheap_.lock.
   215  	gen uint64
   216  }
   217  
   218  // wakeScavenger unparks the scavenger if necessary. It must be called
   219  // after any pacing update.
   220  //
   221  // mheap_.lock and scavenge.lock must not be held.
   222  func wakeScavenger() {
   223  	lock(&scavenge.lock)
   224  	if scavenge.parked {
   225  		// Try to stop the timer but we don't really care if we succeed.
   226  		// It's possible that either a timer was never started, or that
   227  		// we're racing with it.
   228  		// In the case that we're racing with there's the low chance that
   229  		// we experience a spurious wake-up of the scavenger, but that's
   230  		// totally safe.
   231  		stopTimer(scavenge.timer)
   232  
   233  		// Unpark the goroutine and tell it that there may have been a pacing
   234  		// change.
   235  		scavenge.parked = false
   236  		ready(scavenge.g, 0, true)
   237  	}
   238  	unlock(&scavenge.lock)
   239  }
   240  
   241  // scavengeSleep attempts to put the scavenger to sleep for ns.
   242  //
   243  // Note that this function should only be called by the scavenger.
   244  //
   245  // The scavenger may be woken up earlier by a pacing change, and it may not go
   246  // to sleep at all if there's a pending pacing change.
   247  //
   248  // Returns false if awoken early (i.e. true means a complete sleep).
   249  func scavengeSleep(ns int64) bool {
   250  	lock(&scavenge.lock)
   251  
   252  	// First check if there's a pending update.
   253  	// If there is one, don't bother sleeping.
   254  	var hasUpdate bool
   255  	systemstack(func() {
   256  		lock(&mheap_.lock)
   257  		hasUpdate = mheap_.scavengeGen != scavenge.gen
   258  		unlock(&mheap_.lock)
   259  	})
   260  	if hasUpdate {
   261  		unlock(&scavenge.lock)
   262  		return false
   263  	}
   264  
   265  	// Set the timer.
   266  	//
   267  	// This must happen here instead of inside gopark
   268  	// because we can't close over any variables without
   269  	// failing escape analysis.
   270  	now := nanotime()
   271  	scavenge.timer.when = now + ns
   272  	startTimer(scavenge.timer)
   273  
   274  	// Mark ourself as asleep and go to sleep.
   275  	scavenge.parked = true
   276  	goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2)
   277  
   278  	// Return true if we completed the full sleep.
   279  	return (nanotime() - now) >= ns
   280  }
   281  
   282  // Background scavenger.
   283  //
   284  // The background scavenger maintains the RSS of the application below
   285  // the line described by the proportional scavenging statistics in
   286  // the mheap struct.
   287  func bgscavenge(c chan int) {
   288  	scavenge.g = getg()
   289  
   290  	lock(&scavenge.lock)
   291  	scavenge.parked = true
   292  
   293  	scavenge.timer = new(timer)
   294  	scavenge.timer.f = func(_ interface{}, _ uintptr) {
   295  		wakeScavenger()
   296  	}
   297  
   298  	c <- 1
   299  	goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
   300  
   301  	// Parameters for sleeping.
   302  	//
   303  	// If we end up doing more work than we need, we should avoid spinning
   304  	// until we have more work to do: instead, we know exactly how much time
   305  	// until more work will need to be done, so we sleep.
   306  	//
   307  	// We should avoid sleeping for less than minSleepNS because Gosched()
   308  	// overheads among other things will work out better in that case.
   309  	//
   310  	// There's no reason to set a maximum on sleep time because we'll always
   311  	// get woken up earlier if there's any kind of update that could change
   312  	// the scavenger's pacing.
   313  	//
   314  	// retryDelayNS tracks how much to sleep next time we fail to do any
   315  	// useful work.
   316  	const minSleepNS = int64(100 * 1000) // 100 µs
   317  
   318  	retryDelayNS := minSleepNS
   319  
   320  	for {
   321  		released := uintptr(0)
   322  		park := false
   323  		ttnext := int64(0)
   324  
   325  		// Run on the system stack since we grab the heap lock,
   326  		// and a stack growth with the heap lock means a deadlock.
   327  		systemstack(func() {
   328  			lock(&mheap_.lock)
   329  
   330  			// Update the last generation count that the scavenger has handled.
   331  			scavenge.gen = mheap_.scavengeGen
   332  
   333  			// If background scavenging is disabled or if there's no work to do just park.
   334  			retained := heapRetained()
   335  			if mheap_.scavengeBytesPerNS == 0 || retained <= mheap_.scavengeRetainedGoal {
   336  				unlock(&mheap_.lock)
   337  				park = true
   338  				return
   339  			}
   340  
   341  			// Calculate how big we want the retained heap to be
   342  			// at this point in time.
   343  			//
   344  			// The formula is for that of a line, y = b - mx
   345  			// We want y (want),
   346  			//   m = scavengeBytesPerNS (> 0)
   347  			//   x = time between scavengeTimeBasis and now
   348  			//   b = scavengeRetainedBasis
   349  			rate := mheap_.scavengeBytesPerNS
   350  			tdist := nanotime() - mheap_.scavengeTimeBasis
   351  			rdist := uint64(rate * float64(tdist))
   352  			want := mheap_.scavengeRetainedBasis - rdist
   353  
   354  			// If we're above the line, scavenge to get below the
   355  			// line.
   356  			if retained > want {
   357  				released = mheap_.scavengeLocked(uintptr(retained - want))
   358  			}
   359  			unlock(&mheap_.lock)
   360  
   361  			// If we over-scavenged a bit, calculate how much time it'll
   362  			// take at the current rate for us to make that up. We definitely
   363  			// won't have any work to do until at least that amount of time
   364  			// passes.
   365  			if released > uintptr(retained-want) {
   366  				extra := released - uintptr(retained-want)
   367  				ttnext = int64(float64(extra) / rate)
   368  			}
   369  		})
   370  
   371  		if park {
   372  			lock(&scavenge.lock)
   373  			scavenge.parked = true
   374  			goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
   375  			continue
   376  		}
   377  
   378  		if debug.gctrace > 0 {
   379  			if released > 0 {
   380  				print("scvg: ", released>>20, " MB released\n")
   381  			}
   382  			print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
   383  		}
   384  
   385  		if released == 0 {
   386  			// If we were unable to release anything this may be because there's
   387  			// no free memory available to scavenge. Go to sleep and try again.
   388  			if scavengeSleep(retryDelayNS) {
   389  				// If we successfully slept through the delay, back off exponentially.
   390  				retryDelayNS *= 2
   391  			}
   392  			continue
   393  		}
   394  		retryDelayNS = minSleepNS
   395  
   396  		if ttnext > 0 && ttnext > minSleepNS {
   397  			// If there's an appreciable amount of time until the next scavenging
   398  			// goal, just sleep. We'll get woken up if anything changes and this
   399  			// way we avoid spinning.
   400  			scavengeSleep(ttnext)
   401  			continue
   402  		}
   403  
   404  		// Give something else a chance to run, no locks are held.
   405  		Gosched()
   406  	}
   407  }
   408  

View as plain text