...
Run Format

Source file src/runtime/malloc.go

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Memory allocator, based on tcmalloc.
     6	// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
     7	
     8	// The main allocator works in runs of pages.
     9	// Small allocation sizes (up to and including 32 kB) are
    10	// rounded to one of about 100 size classes, each of which
    11	// has its own free list of objects of exactly that size.
    12	// Any free page of memory can be split into a set of objects
    13	// of one size class, which are then managed using free list
    14	// allocators.
    15	//
    16	// The allocator's data structures are:
    17	//
    18	//	FixAlloc: a free-list allocator for fixed-size objects,
    19	//		used to manage storage used by the allocator.
    20	//	MHeap: the malloc heap, managed at page (4096-byte) granularity.
    21	//	MSpan: a run of pages managed by the MHeap.
    22	//	MCentral: a shared free list for a given size class.
    23	//	MCache: a per-thread (in Go, per-P) cache for small objects.
    24	//	MStats: allocation statistics.
    25	//
    26	// Allocating a small object proceeds up a hierarchy of caches:
    27	//
    28	//	1. Round the size up to one of the small size classes
    29	//	   and look in the corresponding MCache free list.
    30	//	   If the list is not empty, allocate an object from it.
    31	//	   This can all be done without acquiring a lock.
    32	//
    33	//	2. If the MCache free list is empty, replenish it by
    34	//	   taking a bunch of objects from the MCentral free list.
    35	//	   Moving a bunch amortizes the cost of acquiring the MCentral lock.
    36	//
    37	//	3. If the MCentral free list is empty, replenish it by
    38	//	   allocating a run of pages from the MHeap and then
    39	//	   chopping that memory into objects of the given size.
    40	//	   Allocating many objects amortizes the cost of locking
    41	//	   the heap.
    42	//
    43	//	4. If the MHeap is empty or has no page runs large enough,
    44	//	   allocate a new group of pages (at least 1MB) from the
    45	//	   operating system.  Allocating a large run of pages
    46	//	   amortizes the cost of talking to the operating system.
    47	//
    48	// Freeing a small object proceeds up the same hierarchy:
    49	//
    50	//	1. Look up the size class for the object and add it to
    51	//	   the MCache free list.
    52	//
    53	//	2. If the MCache free list is too long or the MCache has
    54	//	   too much memory, return some to the MCentral free lists.
    55	//
    56	//	3. If all the objects in a given span have returned to
    57	//	   the MCentral list, return that span to the page heap.
    58	//
    59	//	4. If the heap has too much memory, return some to the
    60	//	   operating system.
    61	//
    62	//	TODO(rsc): Step 4 is not implemented.
    63	//
    64	// Allocating and freeing a large object uses the page heap
    65	// directly, bypassing the MCache and MCentral free lists.
    66	//
    67	// The small objects on the MCache and MCentral free lists
    68	// may or may not be zeroed. They are zeroed if and only if
    69	// the second word of the object is zero. A span in the
    70	// page heap is zeroed unless s->needzero is set. When a span
    71	// is allocated to break into small objects, it is zeroed if needed
    72	// and s->needzero is set. There are two main benefits to delaying the
    73	// zeroing this way:
    74	//
    75	//	1. stack frames allocated from the small object lists
    76	//	   or the page heap can avoid zeroing altogether.
    77	//	2. the cost of zeroing when reusing a small object is
    78	//	   charged to the mutator, not the garbage collector.
    79	
    80	package runtime
    81	
    82	import (
    83		"runtime/internal/sys"
    84		"unsafe"
    85	)
    86	
    87	const (
    88		debugMalloc = false
    89	
    90		maxTinySize   = _TinySize
    91		tinySizeClass = _TinySizeClass
    92		maxSmallSize  = _MaxSmallSize
    93	
    94		pageShift = _PageShift
    95		pageSize  = _PageSize
    96		pageMask  = _PageMask
    97		// By construction, single page spans of the smallest object class
    98		// have the most objects per span.
    99		maxObjsPerSpan = pageSize / 8
   100	
   101		mSpanInUse = _MSpanInUse
   102	
   103		concurrentSweep = _ConcurrentSweep
   104	)
   105	
   106	const (
   107		_PageShift = 13
   108		_PageSize  = 1 << _PageShift
   109		_PageMask  = _PageSize - 1
   110	)
   111	
   112	const (
   113		// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
   114		_64bit = 1 << (^uintptr(0) >> 63) / 2
   115	
   116		// Computed constant. The definition of MaxSmallSize and the
   117		// algorithm in msize.go produces some number of different allocation
   118		// size classes. NumSizeClasses is that number. It's needed here
   119		// because there are static arrays of this length; when msize runs its
   120		// size choosing algorithm it double-checks that NumSizeClasses agrees.
   121		_NumSizeClasses = 67
   122	
   123		// Tunable constants.
   124		_MaxSmallSize = 32 << 10
   125	
   126		// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
   127		_TinySize      = 16
   128		_TinySizeClass = 2
   129	
   130		_FixAllocChunk  = 16 << 10               // Chunk size for FixAlloc
   131		_MaxMHeapList   = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
   132		_HeapAllocChunk = 1 << 20                // Chunk size for heap growth
   133	
   134		// Per-P, per order stack segment cache size.
   135		_StackCacheSize = 32 * 1024
   136	
   137		// Number of orders that get caching. Order 0 is FixedStack
   138		// and each successive order is twice as large.
   139		// We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks
   140		// will be allocated directly.
   141		// Since FixedStack is different on different systems, we
   142		// must vary NumStackOrders to keep the same maximum cached size.
   143		//   OS               | FixedStack | NumStackOrders
   144		//   -----------------+------------+---------------
   145		//   linux/darwin/bsd | 2KB        | 4
   146		//   windows/32       | 4KB        | 3
   147		//   windows/64       | 8KB        | 2
   148		//   plan9            | 4KB        | 3
   149		_NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9
   150	
   151		// Number of bits in page to span calculations (4k pages).
   152		// On Windows 64-bit we limit the arena to 32GB or 35 bits.
   153		// Windows counts memory used by page table into committed memory
   154		// of the process, so we can't reserve too much memory.
   155		// See https://golang.org/issue/5402 and https://golang.org/issue/5236.
   156		// On other 64-bit platforms, we limit the arena to 512GB, or 39 bits.
   157		// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
   158		// On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory,
   159		// but as most devices have less than 4GB of physical memory anyway, we
   160		// try to be conservative here, and only ask for a 2GB heap.
   161		_MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*32
   162		_MHeapMap_Bits      = _MHeapMap_TotalBits - _PageShift
   163	
   164		_MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
   165	
   166		// Max number of threads to run garbage collection.
   167		// 2, 3, and 4 are all plausible maximums depending
   168		// on the hardware details of the machine. The garbage
   169		// collector scales well to 32 cpus.
   170		_MaxGcproc = 32
   171	)
   172	
   173	const _MaxArena32 = 1<<32 - 1
   174	
   175	// OS-defined helpers:
   176	//
   177	// sysAlloc obtains a large chunk of zeroed memory from the
   178	// operating system, typically on the order of a hundred kilobytes
   179	// or a megabyte.
   180	// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
   181	// may use larger alignment, so the caller must be careful to realign the
   182	// memory obtained by sysAlloc.
   183	//
   184	// SysUnused notifies the operating system that the contents
   185	// of the memory region are no longer needed and can be reused
   186	// for other purposes.
   187	// SysUsed notifies the operating system that the contents
   188	// of the memory region are needed again.
   189	//
   190	// SysFree returns it unconditionally; this is only used if
   191	// an out-of-memory error has been detected midway through
   192	// an allocation. It is okay if SysFree is a no-op.
   193	//
   194	// SysReserve reserves address space without allocating memory.
   195	// If the pointer passed to it is non-nil, the caller wants the
   196	// reservation there, but SysReserve can still choose another
   197	// location if that one is unavailable. On some systems and in some
   198	// cases SysReserve will simply check that the address space is
   199	// available and not actually reserve it. If SysReserve returns
   200	// non-nil, it sets *reserved to true if the address space is
   201	// reserved, false if it has merely been checked.
   202	// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
   203	// may use larger alignment, so the caller must be careful to realign the
   204	// memory obtained by sysAlloc.
   205	//
   206	// SysMap maps previously reserved address space for use.
   207	// The reserved argument is true if the address space was really
   208	// reserved, not merely checked.
   209	//
   210	// SysFault marks a (already sysAlloc'd) region to fault
   211	// if accessed. Used only for debugging the runtime.
   212	
   213	func mallocinit() {
   214		initSizes()
   215	
   216		if class_to_size[_TinySizeClass] != _TinySize {
   217			throw("bad TinySizeClass")
   218		}
   219	
   220		var p, bitmapSize, spansSize, pSize, limit uintptr
   221		var reserved bool
   222	
   223		// limit = runtime.memlimit();
   224		// See https://golang.org/issue/5049
   225		// TODO(rsc): Fix after 1.1.
   226		limit = 0
   227	
   228		// Set up the allocation arena, a contiguous area of memory where
   229		// allocated data will be found. The arena begins with a bitmap large
   230		// enough to hold 2 bits per allocated word.
   231		if sys.PtrSize == 8 && (limit == 0 || limit > 1<<30) {
   232			// On a 64-bit machine, allocate from a single contiguous reservation.
   233			// 512 GB (MaxMem) should be big enough for now.
   234			//
   235			// The code will work with the reservation at any address, but ask
   236			// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
   237			// Allocating a 512 GB region takes away 39 bits, and the amd64
   238			// doesn't let us choose the top 17 bits, so that leaves the 9 bits
   239			// in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
   240			// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
   241			// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
   242			// UTF-8 sequences, and they are otherwise as far away from
   243			// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
   244			// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
   245			// on OS X during thread allocations.  0x00c0 causes conflicts with
   246			// AddressSanitizer which reserves all memory up to 0x0100.
   247			// These choices are both for debuggability and to reduce the
   248			// odds of a conservative garbage collector (as is still used in gccgo)
   249			// not collecting memory because some non-pointer block of memory
   250			// had a bit pattern that matched a memory address.
   251			//
   252			// Actually we reserve 544 GB (because the bitmap ends up being 32 GB)
   253			// but it hardly matters: e0 00 is not valid UTF-8 either.
   254			//
   255			// If this fails we fall back to the 32 bit memory mechanism
   256			//
   257			// However, on arm64, we ignore all this advice above and slam the
   258			// allocation at 0x40 << 32 because when using 4k pages with 3-level
   259			// translation buffers, the user address space is limited to 39 bits
   260			// On darwin/arm64, the address space is even smaller.
   261			arenaSize := round(_MaxMem, _PageSize)
   262			bitmapSize = arenaSize / (sys.PtrSize * 8 / 2)
   263			spansSize = arenaSize / _PageSize * sys.PtrSize
   264			spansSize = round(spansSize, _PageSize)
   265			for i := 0; i <= 0x7f; i++ {
   266				switch {
   267				case GOARCH == "arm64" && GOOS == "darwin":
   268					p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
   269				case GOARCH == "arm64":
   270					p = uintptr(i)<<40 | uintptrMask&(0x0040<<32)
   271				default:
   272					p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
   273				}
   274				pSize = bitmapSize + spansSize + arenaSize + _PageSize
   275				p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
   276				if p != 0 {
   277					break
   278				}
   279			}
   280		}
   281	
   282		if p == 0 {
   283			// On a 32-bit machine, we can't typically get away
   284			// with a giant virtual address space reservation.
   285			// Instead we map the memory information bitmap
   286			// immediately after the data segment, large enough
   287			// to handle the entire 4GB address space (256 MB),
   288			// along with a reservation for an initial arena.
   289			// When that gets used up, we'll start asking the kernel
   290			// for any memory anywhere.
   291	
   292			// If we fail to allocate, try again with a smaller arena.
   293			// This is necessary on Android L where we share a process
   294			// with ART, which reserves virtual memory aggressively.
   295			// In the worst case, fall back to a 0-sized initial arena,
   296			// in the hope that subsequent reservations will succeed.
   297			arenaSizes := []uintptr{
   298				512 << 20,
   299				256 << 20,
   300				128 << 20,
   301				0,
   302			}
   303	
   304			for _, arenaSize := range arenaSizes {
   305				bitmapSize = (_MaxArena32 + 1) / (sys.PtrSize * 8 / 2)
   306				spansSize = (_MaxArena32 + 1) / _PageSize * sys.PtrSize
   307				if limit > 0 && arenaSize+bitmapSize+spansSize > limit {
   308					bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1)
   309					arenaSize = bitmapSize * 8
   310					spansSize = arenaSize / _PageSize * sys.PtrSize
   311				}
   312				spansSize = round(spansSize, _PageSize)
   313	
   314				// SysReserve treats the address we ask for, end, as a hint,
   315				// not as an absolute requirement. If we ask for the end
   316				// of the data segment but the operating system requires
   317				// a little more space before we can start allocating, it will
   318				// give out a slightly higher pointer. Except QEMU, which
   319				// is buggy, as usual: it won't adjust the pointer upward.
   320				// So adjust it upward a little bit ourselves: 1/4 MB to get
   321				// away from the running binary image and then round up
   322				// to a MB boundary.
   323				p = round(firstmoduledata.end+(1<<18), 1<<20)
   324				pSize = bitmapSize + spansSize + arenaSize + _PageSize
   325				p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
   326				if p != 0 {
   327					break
   328				}
   329			}
   330			if p == 0 {
   331				throw("runtime: cannot reserve arena virtual address space")
   332			}
   333		}
   334	
   335		// PageSize can be larger than OS definition of page size,
   336		// so SysReserve can give us a PageSize-unaligned pointer.
   337		// To overcome this we ask for PageSize more and round up the pointer.
   338		p1 := round(p, _PageSize)
   339	
   340		mheap_.spans = (**mspan)(unsafe.Pointer(p1))
   341		mheap_.bitmap = p1 + spansSize + bitmapSize
   342		if sys.PtrSize == 4 {
   343			// Set arena_start such that we can accept memory
   344			// reservations located anywhere in the 4GB virtual space.
   345			mheap_.arena_start = 0
   346		} else {
   347			mheap_.arena_start = p1 + (spansSize + bitmapSize)
   348		}
   349		mheap_.arena_end = p + pSize
   350		mheap_.arena_used = p1 + (spansSize + bitmapSize)
   351		mheap_.arena_reserved = reserved
   352	
   353		if mheap_.arena_start&(_PageSize-1) != 0 {
   354			println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
   355			throw("misrounded allocation in mallocinit")
   356		}
   357	
   358		// Initialize the rest of the allocator.
   359		mheap_.init(spansSize)
   360		_g_ := getg()
   361		_g_.m.mcache = allocmcache()
   362	}
   363	
   364	// sysAlloc allocates the next n bytes from the heap arena. The
   365	// returned pointer is always _PageSize aligned and between
   366	// h.arena_start and h.arena_end. sysAlloc returns nil on failure.
   367	// There is no corresponding free function.
   368	func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
   369		if n > h.arena_end-h.arena_used {
   370			// We are in 32-bit mode, maybe we didn't use all possible address space yet.
   371			// Reserve some more space.
   372			p_size := round(n+_PageSize, 256<<20)
   373			new_end := h.arena_end + p_size // Careful: can overflow
   374			if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxArena32 {
   375				// TODO: It would be bad if part of the arena
   376				// is reserved and part is not.
   377				var reserved bool
   378				p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved))
   379				if p == 0 {
   380					return nil
   381				}
   382				if p == h.arena_end {
   383					h.arena_end = new_end
   384					h.arena_reserved = reserved
   385				} else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxArena32 {
   386					// Keep everything page-aligned.
   387					// Our pages are bigger than hardware pages.
   388					h.arena_end = p + p_size
   389					used := p + (-p & (_PageSize - 1))
   390					h.mapBits(used)
   391					h.mapSpans(used)
   392					h.arena_used = used
   393					h.arena_reserved = reserved
   394				} else {
   395					// We haven't added this allocation to
   396					// the stats, so subtract it from a
   397					// fake stat (but avoid underflow).
   398					stat := uint64(p_size)
   399					sysFree(unsafe.Pointer(p), p_size, &stat)
   400				}
   401			}
   402		}
   403	
   404		if n <= h.arena_end-h.arena_used {
   405			// Keep taking from our reservation.
   406			p := h.arena_used
   407			sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys)
   408			h.mapBits(p + n)
   409			h.mapSpans(p + n)
   410			h.arena_used = p + n
   411			if raceenabled {
   412				racemapshadow(unsafe.Pointer(p), n)
   413			}
   414	
   415			if p&(_PageSize-1) != 0 {
   416				throw("misrounded allocation in MHeap_SysAlloc")
   417			}
   418			return unsafe.Pointer(p)
   419		}
   420	
   421		// If using 64-bit, our reservation is all we have.
   422		if h.arena_end-h.arena_start > _MaxArena32 {
   423			return nil
   424		}
   425	
   426		// On 32-bit, once the reservation is gone we can
   427		// try to get memory at a location chosen by the OS.
   428		p_size := round(n, _PageSize) + _PageSize
   429		p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
   430		if p == 0 {
   431			return nil
   432		}
   433	
   434		if p < h.arena_start || p+p_size-h.arena_start > _MaxArena32 {
   435			top := ^uintptr(0)
   436			if top-h.arena_start-1 > _MaxArena32 {
   437				top = h.arena_start + _MaxArena32 + 1
   438			}
   439			print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n")
   440			sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys)
   441			return nil
   442		}
   443	
   444		p_end := p + p_size
   445		p += -p & (_PageSize - 1)
   446		if p+n > h.arena_used {
   447			h.mapBits(p + n)
   448			h.mapSpans(p + n)
   449			h.arena_used = p + n
   450			if p_end > h.arena_end {
   451				h.arena_end = p_end
   452			}
   453			if raceenabled {
   454				racemapshadow(unsafe.Pointer(p), n)
   455			}
   456		}
   457	
   458		if p&(_PageSize-1) != 0 {
   459			throw("misrounded allocation in MHeap_SysAlloc")
   460		}
   461		return unsafe.Pointer(p)
   462	}
   463	
   464	// base address for all 0-byte allocations
   465	var zerobase uintptr
   466	
   467	// nextFreeFast returns the next free object if one is quickly available.
   468	// Otherwise it returns 0.
   469	func nextFreeFast(s *mspan) gclinkptr {
   470		theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache?
   471		if theBit < 64 {
   472			result := s.freeindex + uintptr(theBit)
   473			if result < s.nelems {
   474				freeidx := result + 1
   475				if freeidx%64 == 0 && freeidx != s.nelems {
   476					return 0
   477				}
   478				s.allocCache >>= (theBit + 1)
   479				s.freeindex = freeidx
   480				v := gclinkptr(result*s.elemsize + s.base())
   481				s.allocCount++
   482				return v
   483			}
   484		}
   485		return 0
   486	}
   487	
   488	// nextFree returns the next free object from the cached span if one is available.
   489	// Otherwise it refills the cache with a span with an available object and
   490	// returns that object along with a flag indicating that this was a heavy
   491	// weight allocation. If it is a heavy weight allocation the caller must
   492	// determine whether a new GC cycle needs to be started or if the GC is active
   493	// whether this goroutine needs to assist the GC.
   494	func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, s *mspan, shouldhelpgc bool) {
   495		s = c.alloc[sizeclass]
   496		shouldhelpgc = false
   497		freeIndex := s.nextFreeIndex()
   498		if freeIndex == s.nelems {
   499			// The span is full.
   500			if uintptr(s.allocCount) != s.nelems {
   501				println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
   502				throw("s.allocCount != s.nelems && freeIndex == s.nelems")
   503			}
   504			systemstack(func() {
   505				c.refill(int32(sizeclass))
   506			})
   507			shouldhelpgc = true
   508			s = c.alloc[sizeclass]
   509	
   510			freeIndex = s.nextFreeIndex()
   511		}
   512	
   513		if freeIndex >= s.nelems {
   514			throw("freeIndex is not valid")
   515		}
   516	
   517		v = gclinkptr(freeIndex*s.elemsize + s.base())
   518		s.allocCount++
   519		if uintptr(s.allocCount) > s.nelems {
   520			println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
   521			throw("s.allocCount > s.nelems")
   522		}
   523		return
   524	}
   525	
   526	// Allocate an object of size bytes.
   527	// Small objects are allocated from the per-P cache's free lists.
   528	// Large objects (> 32 kB) are allocated straight from the heap.
   529	func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
   530		if gcphase == _GCmarktermination {
   531			throw("mallocgc called with gcphase == _GCmarktermination")
   532		}
   533	
   534		if size == 0 {
   535			return unsafe.Pointer(&zerobase)
   536		}
   537	
   538		if debug.sbrk != 0 {
   539			align := uintptr(16)
   540			if typ != nil {
   541				align = uintptr(typ.align)
   542			}
   543			return persistentalloc(size, align, &memstats.other_sys)
   544		}
   545	
   546		// assistG is the G to charge for this allocation, or nil if
   547		// GC is not currently active.
   548		var assistG *g
   549		if gcBlackenEnabled != 0 {
   550			// Charge the current user G for this allocation.
   551			assistG = getg()
   552			if assistG.m.curg != nil {
   553				assistG = assistG.m.curg
   554			}
   555			// Charge the allocation against the G. We'll account
   556			// for internal fragmentation at the end of mallocgc.
   557			assistG.gcAssistBytes -= int64(size)
   558	
   559			if assistG.gcAssistBytes < 0 {
   560				// This G is in debt. Assist the GC to correct
   561				// this before allocating. This must happen
   562				// before disabling preemption.
   563				gcAssistAlloc(assistG)
   564			}
   565		}
   566	
   567		// Set mp.mallocing to keep from being preempted by GC.
   568		mp := acquirem()
   569		if mp.mallocing != 0 {
   570			throw("malloc deadlock")
   571		}
   572		if mp.gsignal == getg() {
   573			throw("malloc during signal")
   574		}
   575		mp.mallocing = 1
   576	
   577		shouldhelpgc := false
   578		dataSize := size
   579		c := gomcache()
   580		var x unsafe.Pointer
   581		noscan := typ == nil || typ.kind&kindNoPointers != 0
   582		if size <= maxSmallSize {
   583			if noscan && size < maxTinySize {
   584				// Tiny allocator.
   585				//
   586				// Tiny allocator combines several tiny allocation requests
   587				// into a single memory block. The resulting memory block
   588				// is freed when all subobjects are unreachable. The subobjects
   589				// must be noscan (don't have pointers), this ensures that
   590				// the amount of potentially wasted memory is bounded.
   591				//
   592				// Size of the memory block used for combining (maxTinySize) is tunable.
   593				// Current setting is 16 bytes, which relates to 2x worst case memory
   594				// wastage (when all but one subobjects are unreachable).
   595				// 8 bytes would result in no wastage at all, but provides less
   596				// opportunities for combining.
   597				// 32 bytes provides more opportunities for combining,
   598				// but can lead to 4x worst case wastage.
   599				// The best case winning is 8x regardless of block size.
   600				//
   601				// Objects obtained from tiny allocator must not be freed explicitly.
   602				// So when an object will be freed explicitly, we ensure that
   603				// its size >= maxTinySize.
   604				//
   605				// SetFinalizer has a special case for objects potentially coming
   606				// from tiny allocator, it such case it allows to set finalizers
   607				// for an inner byte of a memory block.
   608				//
   609				// The main targets of tiny allocator are small strings and
   610				// standalone escaping variables. On a json benchmark
   611				// the allocator reduces number of allocations by ~12% and
   612				// reduces heap size by ~20%.
   613				off := c.tinyoffset
   614				// Align tiny pointer for required (conservative) alignment.
   615				if size&7 == 0 {
   616					off = round(off, 8)
   617				} else if size&3 == 0 {
   618					off = round(off, 4)
   619				} else if size&1 == 0 {
   620					off = round(off, 2)
   621				}
   622				if off+size <= maxTinySize && c.tiny != 0 {
   623					// The object fits into existing tiny block.
   624					x = unsafe.Pointer(c.tiny + off)
   625					c.tinyoffset = off + size
   626					c.local_tinyallocs++
   627					mp.mallocing = 0
   628					releasem(mp)
   629					return x
   630				}
   631				// Allocate a new maxTinySize block.
   632				span := c.alloc[tinySizeClass]
   633				v := nextFreeFast(span)
   634				if v == 0 {
   635					v, _, shouldhelpgc = c.nextFree(tinySizeClass)
   636				}
   637				x = unsafe.Pointer(v)
   638				(*[2]uint64)(x)[0] = 0
   639				(*[2]uint64)(x)[1] = 0
   640				// See if we need to replace the existing tiny block with the new one
   641				// based on amount of remaining free space.
   642				if size < c.tinyoffset || c.tiny == 0 {
   643					c.tiny = uintptr(x)
   644					c.tinyoffset = size
   645				}
   646				size = maxTinySize
   647			} else {
   648				var sizeclass int8
   649				if size <= 1024-8 {
   650					sizeclass = size_to_class8[(size+7)>>3]
   651				} else {
   652					sizeclass = size_to_class128[(size-1024+127)>>7]
   653				}
   654				size = uintptr(class_to_size[sizeclass])
   655				span := c.alloc[sizeclass]
   656				v := nextFreeFast(span)
   657				if v == 0 {
   658					v, span, shouldhelpgc = c.nextFree(sizeclass)
   659				}
   660				x = unsafe.Pointer(v)
   661				if needzero && span.needzero != 0 {
   662					memclr(unsafe.Pointer(v), size)
   663				}
   664			}
   665		} else {
   666			var s *mspan
   667			shouldhelpgc = true
   668			systemstack(func() {
   669				s = largeAlloc(size, needzero)
   670			})
   671			s.freeindex = 1
   672			s.allocCount = 1
   673			x = unsafe.Pointer(s.base())
   674			size = s.elemsize
   675		}
   676	
   677		var scanSize uintptr
   678		if noscan {
   679			heapBitsSetTypeNoScan(uintptr(x))
   680		} else {
   681			// If allocating a defer+arg block, now that we've picked a malloc size
   682			// large enough to hold everything, cut the "asked for" size down to
   683			// just the defer header, so that the GC bitmap will record the arg block
   684			// as containing nothing at all (as if it were unused space at the end of
   685			// a malloc block caused by size rounding).
   686			// The defer arg areas are scanned as part of scanstack.
   687			if typ == deferType {
   688				dataSize = unsafe.Sizeof(_defer{})
   689			}
   690			heapBitsSetType(uintptr(x), size, dataSize, typ)
   691			if dataSize > typ.size {
   692				// Array allocation. If there are any
   693				// pointers, GC has to scan to the last
   694				// element.
   695				if typ.ptrdata != 0 {
   696					scanSize = dataSize - typ.size + typ.ptrdata
   697				}
   698			} else {
   699				scanSize = typ.ptrdata
   700			}
   701			c.local_scan += scanSize
   702		}
   703	
   704		// Ensure that the stores above that initialize x to
   705		// type-safe memory and set the heap bits occur before
   706		// the caller can make x observable to the garbage
   707		// collector. Otherwise, on weakly ordered machines,
   708		// the garbage collector could follow a pointer to x,
   709		// but see uninitialized memory or stale heap bits.
   710		publicationBarrier()
   711	
   712		// Allocate black during GC.
   713		// All slots hold nil so no scanning is needed.
   714		// This may be racing with GC so do it atomically if there can be
   715		// a race marking the bit.
   716		if gcphase != _GCoff {
   717			gcmarknewobject(uintptr(x), size, scanSize)
   718		}
   719	
   720		if raceenabled {
   721			racemalloc(x, size)
   722		}
   723	
   724		if msanenabled {
   725			msanmalloc(x, size)
   726		}
   727	
   728		mp.mallocing = 0
   729		releasem(mp)
   730	
   731		if debug.allocfreetrace != 0 {
   732			tracealloc(x, size, typ)
   733		}
   734	
   735		if rate := MemProfileRate; rate > 0 {
   736			if size < uintptr(rate) && int32(size) < c.next_sample {
   737				c.next_sample -= int32(size)
   738			} else {
   739				mp := acquirem()
   740				profilealloc(mp, x, size)
   741				releasem(mp)
   742			}
   743		}
   744	
   745		if assistG != nil {
   746			// Account for internal fragmentation in the assist
   747			// debt now that we know it.
   748			assistG.gcAssistBytes -= int64(size - dataSize)
   749		}
   750	
   751		if shouldhelpgc && gcShouldStart(false) {
   752			gcStart(gcBackgroundMode, false)
   753		}
   754	
   755		return x
   756	}
   757	
   758	func largeAlloc(size uintptr, needzero bool) *mspan {
   759		// print("largeAlloc size=", size, "\n")
   760	
   761		if size+_PageSize < size {
   762			throw("out of memory")
   763		}
   764		npages := size >> _PageShift
   765		if size&_PageMask != 0 {
   766			npages++
   767		}
   768	
   769		// Deduct credit for this span allocation and sweep if
   770		// necessary. mHeap_Alloc will also sweep npages, so this only
   771		// pays the debt down to npage pages.
   772		deductSweepCredit(npages*_PageSize, npages)
   773	
   774		s := mheap_.alloc(npages, 0, true, needzero)
   775		if s == nil {
   776			throw("out of memory")
   777		}
   778		s.limit = s.base() + size
   779		heapBitsForSpan(s.base()).initSpan(s)
   780		return s
   781	}
   782	
   783	// implementation of new builtin
   784	func newobject(typ *_type) unsafe.Pointer {
   785		return mallocgc(typ.size, typ, true)
   786	}
   787	
   788	//go:linkname reflect_unsafe_New reflect.unsafe_New
   789	func reflect_unsafe_New(typ *_type) unsafe.Pointer {
   790		return newobject(typ)
   791	}
   792	
   793	// newarray allocates an array of n elements of type typ.
   794	func newarray(typ *_type, n int) unsafe.Pointer {
   795		if n < 0 || uintptr(n) > maxSliceCap(typ.size) {
   796			panic(plainError("runtime: allocation size out of range"))
   797		}
   798		return mallocgc(typ.size*uintptr(n), typ, true)
   799	}
   800	
   801	//go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray
   802	func reflect_unsafe_NewArray(typ *_type, n int) unsafe.Pointer {
   803		return newarray(typ, n)
   804	}
   805	
   806	func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
   807		mp.mcache.next_sample = nextSample()
   808		mProf_Malloc(x, size)
   809	}
   810	
   811	// nextSample returns the next sampling point for heap profiling.
   812	// It produces a random variable with a geometric distribution and
   813	// mean MemProfileRate. This is done by generating a uniformly
   814	// distributed random number and applying the cumulative distribution
   815	// function for an exponential.
   816	func nextSample() int32 {
   817		if GOOS == "plan9" {
   818			// Plan 9 doesn't support floating point in note handler.
   819			if g := getg(); g == g.m.gsignal {
   820				return nextSampleNoFP()
   821			}
   822		}
   823	
   824		period := MemProfileRate
   825	
   826		// make nextSample not overflow. Maximum possible step is
   827		// -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period.
   828		switch {
   829		case period > 0x7000000:
   830			period = 0x7000000
   831		case period == 0:
   832			return 0
   833		}
   834	
   835		// Let m be the sample rate,
   836		// the probability distribution function is m*exp(-mx), so the CDF is
   837		// p = 1 - exp(-mx), so
   838		// q = 1 - p == exp(-mx)
   839		// log_e(q) = -mx
   840		// -log_e(q)/m = x
   841		// x = -log_e(q) * period
   842		// x = log_2(q) * (-log_e(2)) * period    ; Using log_2 for efficiency
   843		const randomBitCount = 26
   844		q := fastrand1()%(1<<randomBitCount) + 1
   845		qlog := fastlog2(float64(q)) - randomBitCount
   846		if qlog > 0 {
   847			qlog = 0
   848		}
   849		const minusLog2 = -0.6931471805599453 // -ln(2)
   850		return int32(qlog*(minusLog2*float64(period))) + 1
   851	}
   852	
   853	// nextSampleNoFP is similar to nextSample, but uses older,
   854	// simpler code to avoid floating point.
   855	func nextSampleNoFP() int32 {
   856		// Set first allocation sample size.
   857		rate := MemProfileRate
   858		if rate > 0x3fffffff { // make 2*rate not overflow
   859			rate = 0x3fffffff
   860		}
   861		if rate != 0 {
   862			return int32(int(fastrand1()) % (2 * rate))
   863		}
   864		return 0
   865	}
   866	
   867	type persistentAlloc struct {
   868		base unsafe.Pointer
   869		off  uintptr
   870	}
   871	
   872	var globalAlloc struct {
   873		mutex
   874		persistentAlloc
   875	}
   876	
   877	// Wrapper around sysAlloc that can allocate small chunks.
   878	// There is no associated free operation.
   879	// Intended for things like function/type/debug-related persistent data.
   880	// If align is 0, uses default align (currently 8).
   881	func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
   882		var p unsafe.Pointer
   883		systemstack(func() {
   884			p = persistentalloc1(size, align, sysStat)
   885		})
   886		return p
   887	}
   888	
   889	// Must run on system stack because stack growth can (re)invoke it.
   890	// See issue 9174.
   891	//go:systemstack
   892	func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer {
   893		const (
   894			chunk    = 256 << 10
   895			maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
   896		)
   897	
   898		if size == 0 {
   899			throw("persistentalloc: size == 0")
   900		}
   901		if align != 0 {
   902			if align&(align-1) != 0 {
   903				throw("persistentalloc: align is not a power of 2")
   904			}
   905			if align > _PageSize {
   906				throw("persistentalloc: align is too large")
   907			}
   908		} else {
   909			align = 8
   910		}
   911	
   912		if size >= maxBlock {
   913			return sysAlloc(size, sysStat)
   914		}
   915	
   916		mp := acquirem()
   917		var persistent *persistentAlloc
   918		if mp != nil && mp.p != 0 {
   919			persistent = &mp.p.ptr().palloc
   920		} else {
   921			lock(&globalAlloc.mutex)
   922			persistent = &globalAlloc.persistentAlloc
   923		}
   924		persistent.off = round(persistent.off, align)
   925		if persistent.off+size > chunk || persistent.base == nil {
   926			persistent.base = sysAlloc(chunk, &memstats.other_sys)
   927			if persistent.base == nil {
   928				if persistent == &globalAlloc.persistentAlloc {
   929					unlock(&globalAlloc.mutex)
   930				}
   931				throw("runtime: cannot allocate memory")
   932			}
   933			persistent.off = 0
   934		}
   935		p := add(persistent.base, persistent.off)
   936		persistent.off += size
   937		releasem(mp)
   938		if persistent == &globalAlloc.persistentAlloc {
   939			unlock(&globalAlloc.mutex)
   940		}
   941	
   942		if sysStat != &memstats.other_sys {
   943			mSysStatInc(sysStat, size)
   944			mSysStatDec(&memstats.other_sys, size)
   945		}
   946		return p
   947	}
   948	

View as plain text