...
Run Format

Source file src/runtime/mem_linux.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package runtime
     6	
     7	import (
     8		"runtime/internal/sys"
     9		"unsafe"
    10	)
    11	
    12	const (
    13		_EACCES = 13
    14		_EINVAL = 22
    15	)
    16	
    17	// NOTE: vec must be just 1 byte long here.
    18	// Mincore returns ENOMEM if any of the pages are unmapped,
    19	// but we want to know that all of the pages are unmapped.
    20	// To make these the same, we can only ask about one page
    21	// at a time. See golang.org/issue/7476.
    22	var addrspace_vec [1]byte
    23	
    24	func addrspace_free(v unsafe.Pointer, n uintptr) bool {
    25		for off := uintptr(0); off < n; off += physPageSize {
    26			// Use a length of 1 byte, which the kernel will round
    27			// up to one physical page regardless of the true
    28			// physical page size.
    29			errval := mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0])
    30			if errval == -_EINVAL {
    31				// Address is not a multiple of the physical
    32				// page size. Shouldn't happen, but just ignore it.
    33				continue
    34			}
    35			// ENOMEM means unmapped, which is what we want.
    36			// Anything else we assume means the pages are mapped.
    37			if errval != -_ENOMEM {
    38				return false
    39			}
    40		}
    41		return true
    42	}
    43	
    44	func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer {
    45		p := mmap(v, n, prot, flags, fd, offset)
    46		// On some systems, mmap ignores v without
    47		// MAP_FIXED, so retry if the address space is free.
    48		if p != v && addrspace_free(v, n) {
    49			if uintptr(p) > 4096 {
    50				munmap(p, n)
    51			}
    52			p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
    53		}
    54		return p
    55	}
    56	
    57	// Don't split the stack as this method may be invoked without a valid G, which
    58	// prevents us from allocating more stack.
    59	//go:nosplit
    60	func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
    61		p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    62		if uintptr(p) < 4096 {
    63			if uintptr(p) == _EACCES {
    64				print("runtime: mmap: access denied\n")
    65				exit(2)
    66			}
    67			if uintptr(p) == _EAGAIN {
    68				print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
    69				exit(2)
    70			}
    71			return nil
    72		}
    73		mSysStatInc(sysStat, n)
    74		return p
    75	}
    76	
    77	func sysUnused(v unsafe.Pointer, n uintptr) {
    78		// By default, Linux's "transparent huge page" support will
    79		// merge pages into a huge page if there's even a single
    80		// present regular page, undoing the effects of the DONTNEED
    81		// below. On amd64, that means khugepaged can turn a single
    82		// 4KB page to 2MB, bloating the process's RSS by as much as
    83		// 512X. (See issue #8832 and Linux kernel bug
    84		// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
    85		//
    86		// To work around this, we explicitly disable transparent huge
    87		// pages when we release pages of the heap. However, we have
    88		// to do this carefully because changing this flag tends to
    89		// split the VMA (memory mapping) containing v in to three
    90		// VMAs in order to track the different values of the
    91		// MADV_NOHUGEPAGE flag in the different regions. There's a
    92		// default limit of 65530 VMAs per address space (sysctl
    93		// vm.max_map_count), so we must be careful not to create too
    94		// many VMAs (see issue #12233).
    95		//
    96		// Since huge pages are huge, there's little use in adjusting
    97		// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
    98		// exploding the number of VMAs by only adjusting the
    99		// MADV_NOHUGEPAGE flag on a large granularity. This still
   100		// gets most of the benefit of huge pages while keeping the
   101		// number of VMAs under control. With hugePageSize = 2MB, even
   102		// a pessimal heap can reach 128GB before running out of VMAs.
   103		if sys.HugePageSize != 0 {
   104			var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :(
   105	
   106			// If it's a large allocation, we want to leave huge
   107			// pages enabled. Hence, we only adjust the huge page
   108			// flag on the huge pages containing v and v+n-1, and
   109			// only if those aren't aligned.
   110			var head, tail uintptr
   111			if uintptr(v)%s != 0 {
   112				// Compute huge page containing v.
   113				head = uintptr(v) &^ (s - 1)
   114			}
   115			if (uintptr(v)+n)%s != 0 {
   116				// Compute huge page containing v+n-1.
   117				tail = (uintptr(v) + n - 1) &^ (s - 1)
   118			}
   119	
   120			// Note that madvise will return EINVAL if the flag is
   121			// already set, which is quite likely. We ignore
   122			// errors.
   123			if head != 0 && head+sys.HugePageSize == tail {
   124				// head and tail are different but adjacent,
   125				// so do this in one call.
   126				madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE)
   127			} else {
   128				// Advise the huge pages containing v and v+n-1.
   129				if head != 0 {
   130					madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE)
   131				}
   132				if tail != 0 && tail != head {
   133					madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE)
   134				}
   135			}
   136		}
   137	
   138		if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
   139			// madvise will round this to any physical page
   140			// *covered* by this range, so an unaligned madvise
   141			// will release more memory than intended.
   142			throw("unaligned sysUnused")
   143		}
   144	
   145		madvise(v, n, _MADV_DONTNEED)
   146	}
   147	
   148	func sysUsed(v unsafe.Pointer, n uintptr) {
   149		if sys.HugePageSize != 0 {
   150			// Partially undo the NOHUGEPAGE marks from sysUnused
   151			// for whole huge pages between v and v+n. This may
   152			// leave huge pages off at the end points v and v+n
   153			// even though allocations may cover these entire huge
   154			// pages. We could detect this and undo NOHUGEPAGE on
   155			// the end points as well, but it's probably not worth
   156			// the cost because when neighboring allocations are
   157			// freed sysUnused will just set NOHUGEPAGE again.
   158			var s uintptr = sys.HugePageSize
   159	
   160			// Round v up to a huge page boundary.
   161			beg := (uintptr(v) + (s - 1)) &^ (s - 1)
   162			// Round v+n down to a huge page boundary.
   163			end := (uintptr(v) + n) &^ (s - 1)
   164	
   165			if beg < end {
   166				madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
   167			}
   168		}
   169	}
   170	
   171	// Don't split the stack as this function may be invoked without a valid G,
   172	// which prevents us from allocating more stack.
   173	//go:nosplit
   174	func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
   175		mSysStatDec(sysStat, n)
   176		munmap(v, n)
   177	}
   178	
   179	func sysFault(v unsafe.Pointer, n uintptr) {
   180		mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
   181	}
   182	
   183	func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
   184		// On 64-bit, people with ulimit -v set complain if we reserve too
   185		// much address space. Instead, assume that the reservation is okay
   186		// if we can reserve at least 64K and check the assumption in SysMap.
   187		// Only user-mode Linux (UML) rejects these requests.
   188		if sys.PtrSize == 8 && uint64(n) > 1<<32 {
   189			p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   190			if p != v {
   191				if uintptr(p) >= 4096 {
   192					munmap(p, 64<<10)
   193				}
   194				return nil
   195			}
   196			munmap(p, 64<<10)
   197			*reserved = false
   198			return v
   199		}
   200	
   201		p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   202		if uintptr(p) < 4096 {
   203			return nil
   204		}
   205		*reserved = true
   206		return p
   207	}
   208	
   209	func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
   210		mSysStatInc(sysStat, n)
   211	
   212		// On 64-bit, we don't actually have v reserved, so tread carefully.
   213		if !reserved {
   214			p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
   215			if uintptr(p) == _ENOMEM {
   216				throw("runtime: out of memory")
   217			}
   218			if p != v {
   219				print("runtime: address space conflict: map(", v, ") = ", p, "\n")
   220				throw("runtime: address space conflict")
   221			}
   222			return
   223		}
   224	
   225		p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
   226		if uintptr(p) == _ENOMEM {
   227			throw("runtime: out of memory")
   228		}
   229		if p != v {
   230			throw("runtime: cannot map pages in arena address space")
   231		}
   232	}
   233	

View as plain text