Source file src/runtime/mgcscavenge.go

Documentation: runtime

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Scavenging free pages.
     6  //
     7  // This file implements scavenging (the release of physical pages backing mapped
     8  // memory) of free and unused pages in the heap as a way to deal with page-level
     9  // fragmentation and reduce the RSS of Go applications.
    10  //
    11  // Scavenging in Go happens on two fronts: there's the background
    12  // (asynchronous) scavenger and the heap-growth (synchronous) scavenger.
    13  //
    14  // The former happens on a goroutine much like the background sweeper which is
    15  // soft-capped at using scavengePercent of the mutator's time, based on
    16  // order-of-magnitude estimates of the costs of scavenging. The background
    17  // scavenger's primary goal is to bring the estimated heap RSS of the
    18  // application down to a goal.
    19  //
    20  // That goal is defined as (retainExtraPercent+100) / 100 * next_gc.
    21  //
    22  // The goal is updated after each GC and the scavenger's pacing parameters
    23  // (which live in mheap_) are updated to match. The pacing parameters work much
    24  // like the background sweeping parameters. The parameters define a line whose
    25  // horizontal axis is time and vertical axis is estimated heap RSS, and the
    26  // scavenger attempts to stay below that line at all times.
    27  //
    28  // The synchronous heap-growth scavenging happens whenever the heap grows in
    29  // size, for some definition of heap-growth. The intuition behind this is that
    30  // the application had to grow the heap because existing fragments were
    31  // not sufficiently large to satisfy a page-level memory allocation, so we
    32  // scavenge those fragments eagerly to offset the growth in RSS that results.
    33  
    34  package runtime
    35  
    36  const (
    37  	// The background scavenger is paced according to these parameters.
    38  	//
    39  	// scavengePercent represents the portion of mutator time we're willing
    40  	// to spend on scavenging in percent.
    41  	//
    42  	// scavengePageLatency is a worst-case estimate (order-of-magnitude) of
    43  	// the time it takes to scavenge one (regular-sized) page of memory.
    44  	// scavengeHugePageLatency is the same but for huge pages.
    45  	//
    46  	// scavengePagePeriod is derived from scavengePercent and scavengePageLatency,
    47  	// and represents the average time between scavenging one page that we're
    48  	// aiming for. scavengeHugePagePeriod is the same but for huge pages.
    49  	// These constants are core to the scavenge pacing algorithm.
    50  	scavengePercent         = 1    // 1%
    51  	scavengePageLatency     = 10e3 // 10µs
    52  	scavengeHugePageLatency = 10e3 // 10µs
    53  	scavengePagePeriod      = scavengePageLatency / (scavengePercent / 100.0)
    54  	scavengeHugePagePeriod  = scavengePageLatency / (scavengePercent / 100.0)
    55  
    56  	// retainExtraPercent represents the amount of memory over the heap goal
    57  	// that the scavenger should keep as a buffer space for the allocator.
    58  	//
    59  	// The purpose of maintaining this overhead is to have a greater pool of
    60  	// unscavenged memory available for allocation (since using scavenged memory
    61  	// incurs an additional cost), to account for heap fragmentation and
    62  	// the ever-changing layout of the heap.
    63  	retainExtraPercent = 10
    64  )
    65  
    66  // heapRetained returns an estimate of the current heap RSS.
    67  //
    68  // mheap_.lock must be held or the world must be stopped.
    69  func heapRetained() uint64 {
    70  	return memstats.heap_sys - memstats.heap_released
    71  }
    72  
    73  // gcPaceScavenger updates the scavenger's pacing, particularly
    74  // its rate and RSS goal.
    75  //
    76  // The RSS goal is based on the current heap goal with a small overhead
    77  // to accomodate non-determinism in the allocator.
    78  //
    79  // The pacing is based on scavengePageRate, which applies to both regular and
    80  // huge pages. See that constant for more information.
    81  //
    82  // mheap_.lock must be held or the world must be stopped.
    83  func gcPaceScavenger() {
    84  	// Compute our scavenging goal and align it to a physical page boundary
    85  	// to make the following calculations more exact.
    86  	retainedGoal := memstats.next_gc
    87  	// Add retainExtraPercent overhead to retainedGoal. This calculation
    88  	// looks strange but the purpose is to arrive at an integer division
    89  	// (e.g. if retainExtraPercent = 12.5, then we get a divisor of 8)
    90  	// that also avoids the overflow from a multiplication.
    91  	retainedGoal += retainedGoal / (1.0 / (retainExtraPercent / 100.0))
    92  	retainedGoal = (retainedGoal + uint64(physPageSize) - 1) &^ (uint64(physPageSize) - 1)
    93  
    94  	// Represents where we are now in the heap's contribution to RSS in bytes.
    95  	//
    96  	// Guaranteed to always be a multiple of physPageSize on systems where
    97  	// physPageSize <= pageSize since we map heap_sys at a rate larger than
    98  	// any physPageSize and released memory in multiples of the physPageSize.
    99  	//
   100  	// However, certain functions recategorize heap_sys as other stats (e.g.
   101  	// stack_sys) and this happens in multiples of pageSize, so on systems
   102  	// where physPageSize > pageSize the calculations below will not be exact.
   103  	// Generally this is OK since we'll be off by at most one regular
   104  	// physical page.
   105  	retainedNow := heapRetained()
   106  
   107  	// If we're already below our goal, publish the goal in case it changed
   108  	// then disable the background scavenger.
   109  	if retainedNow <= retainedGoal {
   110  		mheap_.scavengeRetainedGoal = retainedGoal
   111  		mheap_.scavengeBytesPerNS = 0
   112  		return
   113  	}
   114  
   115  	// Now we start to compute the total amount of work necessary and the total
   116  	// amount of time we're willing to give the scavenger to complete this work.
   117  	// This will involve calculating how much of the work consists of huge pages
   118  	// and how much consists of regular pages since the former can let us scavenge
   119  	// more memory in the same time.
   120  	totalWork := retainedNow - retainedGoal
   121  
   122  	// On systems without huge page support, all work is regular work.
   123  	regularWork := totalWork
   124  	hugeTime := uint64(0)
   125  
   126  	// On systems where we have huge pages, we want to do as much of the
   127  	// scavenging work as possible on huge pages, because the costs are the
   128  	// same per page, but we can give back more more memory in a shorter
   129  	// period of time.
   130  	if physHugePageSize != 0 {
   131  		// Start by computing the amount of free memory we have in huge pages
   132  		// in total. Trivially, this is all the huge page work we need to do.
   133  		hugeWork := uint64(mheap_.free.unscavHugePages) << physHugePageShift
   134  
   135  		// ...but it could turn out that there's more huge work to do than
   136  		// total work, so cap it at total work. This might happen for very large
   137  		// heaps where the additional factor of retainExtraPercent can make it so
   138  		// that there are free chunks of memory larger than a huge page that we don't want
   139  		// to scavenge.
   140  		if hugeWork >= totalWork {
   141  			hugePages := totalWork >> physHugePageShift
   142  			hugeWork = hugePages << physHugePageShift
   143  		}
   144  		// Everything that's not huge work is regular work. At this point we
   145  		// know huge work so we can calculate how much time that will take
   146  		// based on scavengePageRate (which applies to pages of any size).
   147  		regularWork = totalWork - hugeWork
   148  		hugeTime = (hugeWork >> physHugePageShift) * scavengeHugePagePeriod
   149  	}
   150  	// Finally, we can compute how much time it'll take to do the regular work
   151  	// and the total time to do all the work.
   152  	regularTime := regularWork / uint64(physPageSize) * scavengePagePeriod
   153  	totalTime := hugeTime + regularTime
   154  
   155  	now := nanotime()
   156  
   157  	lock(&scavenge.lock)
   158  
   159  	// Update all the pacing parameters in mheap with scavenge.lock held,
   160  	// so that scavenge.gen is kept in sync with the updated values.
   161  	mheap_.scavengeRetainedGoal = retainedGoal
   162  	mheap_.scavengeRetainedBasis = retainedNow
   163  	mheap_.scavengeTimeBasis = now
   164  	mheap_.scavengeBytesPerNS = float64(totalWork) / float64(totalTime)
   165  	scavenge.gen++ // increase scavenge generation
   166  
   167  	// Wake up background scavenger if needed, since the pacing was just updated.
   168  	wakeScavengerLocked()
   169  
   170  	unlock(&scavenge.lock)
   171  }
   172  
   173  // State of the background scavenger.
   174  var scavenge struct {
   175  	lock   mutex
   176  	g      *g
   177  	parked bool
   178  	timer  *timer
   179  	gen    uint32 // read with either lock or mheap_.lock, write with both
   180  }
   181  
   182  // wakeScavengerLocked unparks the scavenger if necessary. It must be called
   183  // after any pacing update.
   184  //
   185  // scavenge.lock must be held.
   186  func wakeScavengerLocked() {
   187  	if scavenge.parked {
   188  		// Try to stop the timer but we don't really care if we succeed.
   189  		// It's possible that either a timer was never started, or that
   190  		// we're racing with it.
   191  		// In the case that we're racing with there's the low chance that
   192  		// we experience a spurious wake-up of the scavenger, but that's
   193  		// totally safe.
   194  		stopTimer(scavenge.timer)
   195  
   196  		// Unpark the goroutine and tell it that there may have been a pacing
   197  		// change.
   198  		scavenge.parked = false
   199  		ready(scavenge.g, 0, true)
   200  	}
   201  }
   202  
   203  // scavengeSleep attempts to put the scavenger to sleep for ns.
   204  // It also checks to see if gen != scavenge.gen before going to sleep,
   205  // and aborts if true (meaning an update had occurred).
   206  //
   207  // Note that this function should only be called by the scavenger.
   208  //
   209  // The scavenger may be woken up earlier by a pacing change, and it may not go
   210  // to sleep at all if there's a pending pacing change.
   211  //
   212  // Returns false if awoken early (i.e. true means a complete sleep).
   213  func scavengeSleep(gen uint32, ns int64) bool {
   214  	lock(&scavenge.lock)
   215  
   216  	// If there was an update, just abort the sleep.
   217  	if scavenge.gen != gen {
   218  		unlock(&scavenge.lock)
   219  		return false
   220  	}
   221  
   222  	// Set the timer.
   223  	now := nanotime()
   224  	scavenge.timer.when = now + ns
   225  	startTimer(scavenge.timer)
   226  
   227  	// Park the goroutine. It's fine that we don't publish the
   228  	// fact that the timer was set; even if the timer wakes up
   229  	// and fire scavengeReady before we park, it'll block on
   230  	// scavenge.lock.
   231  	scavenge.parked = true
   232  	goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2)
   233  
   234  	// Return true if we completed the full sleep.
   235  	return (nanotime() - now) >= ns
   236  }
   237  
   238  // Background scavenger.
   239  //
   240  // The background scavenger maintains the RSS of the application below
   241  // the line described by the proportional scavenging statistics in
   242  // the mheap struct.
   243  func bgscavenge(c chan int) {
   244  	scavenge.g = getg()
   245  
   246  	lock(&scavenge.lock)
   247  	scavenge.parked = true
   248  
   249  	scavenge.timer = new(timer)
   250  	scavenge.timer.f = func(_ interface{}, _ uintptr) {
   251  		lock(&scavenge.lock)
   252  		wakeScavengerLocked()
   253  		unlock(&scavenge.lock)
   254  	}
   255  
   256  	c <- 1
   257  	goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
   258  
   259  	// Parameters for sleeping.
   260  	//
   261  	// If we end up doing more work than we need, we should avoid spinning
   262  	// until we have more work to do: instead, we know exactly how much time
   263  	// until more work will need to be done, so we sleep.
   264  	//
   265  	// We should avoid sleeping for less than minSleepNS because Gosched()
   266  	// overheads among other things will work out better in that case.
   267  	//
   268  	// There's no reason to set a maximum on sleep time because we'll always
   269  	// get woken up earlier if there's any kind of update that could change
   270  	// the scavenger's pacing.
   271  	//
   272  	// retryDelayNS tracks how much to sleep next time we fail to do any
   273  	// useful work.
   274  	const minSleepNS = int64(100 * 1000) // 100 µs
   275  
   276  	retryDelayNS := minSleepNS
   277  
   278  	for {
   279  		released := uintptr(0)
   280  		park := false
   281  		ttnext := int64(0)
   282  		gen := uint32(0)
   283  
   284  		// Run on the system stack since we grab the heap lock,
   285  		// and a stack growth with the heap lock means a deadlock.
   286  		systemstack(func() {
   287  			lock(&mheap_.lock)
   288  
   289  			gen = scavenge.gen
   290  
   291  			// If background scavenging is disabled or if there's no work to do just park.
   292  			retained := heapRetained()
   293  			if mheap_.scavengeBytesPerNS == 0 || retained <= mheap_.scavengeRetainedGoal {
   294  				unlock(&mheap_.lock)
   295  				park = true
   296  				return
   297  			}
   298  
   299  			// Calculate how big we want the retained heap to be
   300  			// at this point in time.
   301  			//
   302  			// The formula is for that of a line, y = b - mx
   303  			// We want y (want),
   304  			//   m = scavengeBytesPerNS (> 0)
   305  			//   x = time between scavengeTimeBasis and now
   306  			//   b = scavengeRetainedBasis
   307  			rate := mheap_.scavengeBytesPerNS
   308  			tdist := nanotime() - mheap_.scavengeTimeBasis
   309  			rdist := uint64(rate * float64(tdist))
   310  			want := mheap_.scavengeRetainedBasis - rdist
   311  
   312  			// If we're above the line, scavenge to get below the
   313  			// line.
   314  			if retained > want {
   315  				released = mheap_.scavengeLocked(uintptr(retained - want))
   316  			}
   317  			unlock(&mheap_.lock)
   318  
   319  			// If we over-scavenged a bit, calculate how much time it'll
   320  			// take at the current rate for us to make that up. We definitely
   321  			// won't have any work to do until at least that amount of time
   322  			// passes.
   323  			if released > uintptr(retained-want) {
   324  				extra := released - uintptr(retained-want)
   325  				ttnext = int64(float64(extra) / rate)
   326  			}
   327  		})
   328  
   329  		if park {
   330  			lock(&scavenge.lock)
   331  			scavenge.parked = true
   332  			goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
   333  			continue
   334  		}
   335  
   336  		if debug.gctrace > 0 {
   337  			if released > 0 {
   338  				print("scvg: ", released>>20, " MB released\n")
   339  			}
   340  			print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
   341  		}
   342  
   343  		if released == 0 {
   344  			// If we were unable to release anything this may be because there's
   345  			// no free memory available to scavenge. Go to sleep and try again.
   346  			if scavengeSleep(gen, retryDelayNS) {
   347  				// If we successfully slept through the delay, back off exponentially.
   348  				retryDelayNS *= 2
   349  			}
   350  			continue
   351  		}
   352  		retryDelayNS = minSleepNS
   353  
   354  		if ttnext > 0 && ttnext > minSleepNS {
   355  			// If there's an appreciable amount of time until the next scavenging
   356  			// goal, just sleep. We'll get woken up if anything changes and this
   357  			// way we avoid spinning.
   358  			scavengeSleep(gen, ttnext)
   359  			continue
   360  		}
   361  
   362  		// Give something else a chance to run, no locks are held.
   363  		Gosched()
   364  	}
   365  }
   366  

View as plain text