...
Run Format

Source file src/runtime/mem_linux.go

Documentation: runtime

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"runtime/internal/atomic"
     9  	"runtime/internal/sys"
    10  	"unsafe"
    11  )
    12  
    13  const (
    14  	_EACCES = 13
    15  	_EINVAL = 22
    16  )
    17  
    18  // Don't split the stack as this method may be invoked without a valid G, which
    19  // prevents us from allocating more stack.
    20  //go:nosplit
    21  func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
    22  	p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    23  	if err != 0 {
    24  		if err == _EACCES {
    25  			print("runtime: mmap: access denied\n")
    26  			exit(2)
    27  		}
    28  		if err == _EAGAIN {
    29  			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
    30  			exit(2)
    31  		}
    32  		return nil
    33  	}
    34  	mSysStatInc(sysStat, n)
    35  	return p
    36  }
    37  
    38  var adviseUnused = uint32(_MADV_FREE)
    39  
    40  func sysUnused(v unsafe.Pointer, n uintptr) {
    41  	// By default, Linux's "transparent huge page" support will
    42  	// merge pages into a huge page if there's even a single
    43  	// present regular page, undoing the effects of madvise(adviseUnused)
    44  	// below. On amd64, that means khugepaged can turn a single
    45  	// 4KB page to 2MB, bloating the process's RSS by as much as
    46  	// 512X. (See issue #8832 and Linux kernel bug
    47  	// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
    48  	//
    49  	// To work around this, we explicitly disable transparent huge
    50  	// pages when we release pages of the heap. However, we have
    51  	// to do this carefully because changing this flag tends to
    52  	// split the VMA (memory mapping) containing v in to three
    53  	// VMAs in order to track the different values of the
    54  	// MADV_NOHUGEPAGE flag in the different regions. There's a
    55  	// default limit of 65530 VMAs per address space (sysctl
    56  	// vm.max_map_count), so we must be careful not to create too
    57  	// many VMAs (see issue #12233).
    58  	//
    59  	// Since huge pages are huge, there's little use in adjusting
    60  	// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
    61  	// exploding the number of VMAs by only adjusting the
    62  	// MADV_NOHUGEPAGE flag on a large granularity. This still
    63  	// gets most of the benefit of huge pages while keeping the
    64  	// number of VMAs under control. With hugePageSize = 2MB, even
    65  	// a pessimal heap can reach 128GB before running out of VMAs.
    66  	if sys.HugePageSize != 0 {
    67  		var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :(
    68  
    69  		// If it's a large allocation, we want to leave huge
    70  		// pages enabled. Hence, we only adjust the huge page
    71  		// flag on the huge pages containing v and v+n-1, and
    72  		// only if those aren't aligned.
    73  		var head, tail uintptr
    74  		if uintptr(v)%s != 0 {
    75  			// Compute huge page containing v.
    76  			head = uintptr(v) &^ (s - 1)
    77  		}
    78  		if (uintptr(v)+n)%s != 0 {
    79  			// Compute huge page containing v+n-1.
    80  			tail = (uintptr(v) + n - 1) &^ (s - 1)
    81  		}
    82  
    83  		// Note that madvise will return EINVAL if the flag is
    84  		// already set, which is quite likely. We ignore
    85  		// errors.
    86  		if head != 0 && head+sys.HugePageSize == tail {
    87  			// head and tail are different but adjacent,
    88  			// so do this in one call.
    89  			madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE)
    90  		} else {
    91  			// Advise the huge pages containing v and v+n-1.
    92  			if head != 0 {
    93  				madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE)
    94  			}
    95  			if tail != 0 && tail != head {
    96  				madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE)
    97  			}
    98  		}
    99  	}
   100  
   101  	if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
   102  		// madvise will round this to any physical page
   103  		// *covered* by this range, so an unaligned madvise
   104  		// will release more memory than intended.
   105  		throw("unaligned sysUnused")
   106  	}
   107  
   108  	var advise uint32
   109  	if debug.madvdontneed != 0 {
   110  		advise = _MADV_DONTNEED
   111  	} else {
   112  		advise = atomic.Load(&adviseUnused)
   113  	}
   114  	if errno := madvise(v, n, int32(advise)); advise == _MADV_FREE && errno != 0 {
   115  		// MADV_FREE was added in Linux 4.5. Fall back to MADV_DONTNEED if it is
   116  		// not supported.
   117  		atomic.Store(&adviseUnused, _MADV_DONTNEED)
   118  		madvise(v, n, _MADV_DONTNEED)
   119  	}
   120  }
   121  
   122  func sysUsed(v unsafe.Pointer, n uintptr) {
   123  	if sys.HugePageSize != 0 {
   124  		// Partially undo the NOHUGEPAGE marks from sysUnused
   125  		// for whole huge pages between v and v+n. This may
   126  		// leave huge pages off at the end points v and v+n
   127  		// even though allocations may cover these entire huge
   128  		// pages. We could detect this and undo NOHUGEPAGE on
   129  		// the end points as well, but it's probably not worth
   130  		// the cost because when neighboring allocations are
   131  		// freed sysUnused will just set NOHUGEPAGE again.
   132  		var s uintptr = sys.HugePageSize
   133  
   134  		// Round v up to a huge page boundary.
   135  		beg := (uintptr(v) + (s - 1)) &^ (s - 1)
   136  		// Round v+n down to a huge page boundary.
   137  		end := (uintptr(v) + n) &^ (s - 1)
   138  
   139  		if beg < end {
   140  			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
   141  		}
   142  	}
   143  }
   144  
   145  // Don't split the stack as this function may be invoked without a valid G,
   146  // which prevents us from allocating more stack.
   147  //go:nosplit
   148  func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
   149  	mSysStatDec(sysStat, n)
   150  	munmap(v, n)
   151  }
   152  
   153  func sysFault(v unsafe.Pointer, n uintptr) {
   154  	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
   155  }
   156  
   157  func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
   158  	p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   159  	if err != 0 {
   160  		return nil
   161  	}
   162  	return p
   163  }
   164  
   165  func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
   166  	mSysStatInc(sysStat, n)
   167  
   168  	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
   169  	if err == _ENOMEM {
   170  		throw("runtime: out of memory")
   171  	}
   172  	if p != v || err != 0 {
   173  		throw("runtime: cannot map pages in arena address space")
   174  	}
   175  }
   176  

View as plain text