...
Run Format

Source file src/runtime/mem_linux.go

  // Copyright 2010 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package runtime
  
  import (
  	"runtime/internal/sys"
  	"unsafe"
  )
  
  const (
  	_EACCES = 13
  	_EINVAL = 22
  )
  
  // NOTE: vec must be just 1 byte long here.
  // Mincore returns ENOMEM if any of the pages are unmapped,
  // but we want to know that all of the pages are unmapped.
  // To make these the same, we can only ask about one page
  // at a time. See golang.org/issue/7476.
  var addrspace_vec [1]byte
  
  func addrspace_free(v unsafe.Pointer, n uintptr) bool {
  	for off := uintptr(0); off < n; off += physPageSize {
  		// Use a length of 1 byte, which the kernel will round
  		// up to one physical page regardless of the true
  		// physical page size.
  		errval := mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0])
  		if errval == -_EINVAL {
  			// Address is not a multiple of the physical
  			// page size. Shouldn't happen, but just ignore it.
  			continue
  		}
  		// ENOMEM means unmapped, which is what we want.
  		// Anything else we assume means the pages are mapped.
  		if errval != -_ENOMEM {
  			return false
  		}
  	}
  	return true
  }
  
  func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer {
  	p := mmap(v, n, prot, flags, fd, offset)
  	// On some systems, mmap ignores v without
  	// MAP_FIXED, so retry if the address space is free.
  	if p != v && addrspace_free(v, n) {
  		if uintptr(p) > 4096 {
  			munmap(p, n)
  		}
  		p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
  	}
  	return p
  }
  
  // Don't split the stack as this method may be invoked without a valid G, which
  // prevents us from allocating more stack.
  //go:nosplit
  func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
  	p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
  	if uintptr(p) < 4096 {
  		if uintptr(p) == _EACCES {
  			print("runtime: mmap: access denied\n")
  			exit(2)
  		}
  		if uintptr(p) == _EAGAIN {
  			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
  			exit(2)
  		}
  		return nil
  	}
  	mSysStatInc(sysStat, n)
  	return p
  }
  
  func sysUnused(v unsafe.Pointer, n uintptr) {
  	// By default, Linux's "transparent huge page" support will
  	// merge pages into a huge page if there's even a single
  	// present regular page, undoing the effects of the DONTNEED
  	// below. On amd64, that means khugepaged can turn a single
  	// 4KB page to 2MB, bloating the process's RSS by as much as
  	// 512X. (See issue #8832 and Linux kernel bug
  	// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
  	//
  	// To work around this, we explicitly disable transparent huge
  	// pages when we release pages of the heap. However, we have
  	// to do this carefully because changing this flag tends to
  	// split the VMA (memory mapping) containing v in to three
  	// VMAs in order to track the different values of the
  	// MADV_NOHUGEPAGE flag in the different regions. There's a
  	// default limit of 65530 VMAs per address space (sysctl
  	// vm.max_map_count), so we must be careful not to create too
  	// many VMAs (see issue #12233).
  	//
  	// Since huge pages are huge, there's little use in adjusting
  	// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
  	// exploding the number of VMAs by only adjusting the
  	// MADV_NOHUGEPAGE flag on a large granularity. This still
  	// gets most of the benefit of huge pages while keeping the
  	// number of VMAs under control. With hugePageSize = 2MB, even
  	// a pessimal heap can reach 128GB before running out of VMAs.
  	if sys.HugePageSize != 0 {
  		var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :(
  
  		// If it's a large allocation, we want to leave huge
  		// pages enabled. Hence, we only adjust the huge page
  		// flag on the huge pages containing v and v+n-1, and
  		// only if those aren't aligned.
  		var head, tail uintptr
  		if uintptr(v)%s != 0 {
  			// Compute huge page containing v.
  			head = uintptr(v) &^ (s - 1)
  		}
  		if (uintptr(v)+n)%s != 0 {
  			// Compute huge page containing v+n-1.
  			tail = (uintptr(v) + n - 1) &^ (s - 1)
  		}
  
  		// Note that madvise will return EINVAL if the flag is
  		// already set, which is quite likely. We ignore
  		// errors.
  		if head != 0 && head+sys.HugePageSize == tail {
  			// head and tail are different but adjacent,
  			// so do this in one call.
  			madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE)
  		} else {
  			// Advise the huge pages containing v and v+n-1.
  			if head != 0 {
  				madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE)
  			}
  			if tail != 0 && tail != head {
  				madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE)
  			}
  		}
  	}
  
  	if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
  		// madvise will round this to any physical page
  		// *covered* by this range, so an unaligned madvise
  		// will release more memory than intended.
  		throw("unaligned sysUnused")
  	}
  
  	madvise(v, n, _MADV_DONTNEED)
  }
  
  func sysUsed(v unsafe.Pointer, n uintptr) {
  	if sys.HugePageSize != 0 {
  		// Partially undo the NOHUGEPAGE marks from sysUnused
  		// for whole huge pages between v and v+n. This may
  		// leave huge pages off at the end points v and v+n
  		// even though allocations may cover these entire huge
  		// pages. We could detect this and undo NOHUGEPAGE on
  		// the end points as well, but it's probably not worth
  		// the cost because when neighboring allocations are
  		// freed sysUnused will just set NOHUGEPAGE again.
  		var s uintptr = sys.HugePageSize
  
  		// Round v up to a huge page boundary.
  		beg := (uintptr(v) + (s - 1)) &^ (s - 1)
  		// Round v+n down to a huge page boundary.
  		end := (uintptr(v) + n) &^ (s - 1)
  
  		if beg < end {
  			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
  		}
  	}
  }
  
  // Don't split the stack as this function may be invoked without a valid G,
  // which prevents us from allocating more stack.
  //go:nosplit
  func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
  	mSysStatDec(sysStat, n)
  	munmap(v, n)
  }
  
  func sysFault(v unsafe.Pointer, n uintptr) {
  	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
  }
  
  func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
  	// On 64-bit, people with ulimit -v set complain if we reserve too
  	// much address space. Instead, assume that the reservation is okay
  	// if we can reserve at least 64K and check the assumption in SysMap.
  	// Only user-mode Linux (UML) rejects these requests.
  	if sys.PtrSize == 8 && uint64(n) > 1<<32 {
  		p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
  		if p != v {
  			if uintptr(p) >= 4096 {
  				munmap(p, 64<<10)
  			}
  			return nil
  		}
  		munmap(p, 64<<10)
  		*reserved = false
  		return v
  	}
  
  	p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
  	if uintptr(p) < 4096 {
  		return nil
  	}
  	*reserved = true
  	return p
  }
  
  func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
  	mSysStatInc(sysStat, n)
  
  	// On 64-bit, we don't actually have v reserved, so tread carefully.
  	if !reserved {
  		p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
  		if uintptr(p) == _ENOMEM {
  			throw("runtime: out of memory")
  		}
  		if p != v {
  			print("runtime: address space conflict: map(", v, ") = ", p, "\n")
  			throw("runtime: address space conflict")
  		}
  		return
  	}
  
  	p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
  	if uintptr(p) == _ENOMEM {
  		throw("runtime: out of memory")
  	}
  	if p != v {
  		throw("runtime: cannot map pages in arena address space")
  	}
  }
  

View as plain text