cpuprof.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // CPU profiling.
     6  //
     7  // The signal handler for the profiling clock tick adds a new stack trace
     8  // to a log of recent traces. The log is read by a user goroutine that
     9  // turns it into formatted profile data. If the reader does not keep up
    10  // with the log, those writes will be recorded as a count of lost records.
    11  // The actual profile buffer is in profbuf.go.
    12  
    13  package runtime
    14  
    15  import (
    16  	"internal/abi"
    17  	"runtime/internal/sys"
    18  	"unsafe"
    19  )
    20  
    21  const (
    22  	maxCPUProfStack = 64
    23  
    24  	// profBufWordCount is the size of the CPU profile buffer's storage for the
    25  	// header and stack of each sample, measured in 64-bit words. Every sample
    26  	// has a required header of two words. With a small additional header (a
    27  	// word or two) and stacks at the profiler's maximum length of 64 frames,
    28  	// that capacity can support 1900 samples or 19 thread-seconds at a 100 Hz
    29  	// sample rate, at a cost of 1 MiB.
    30  	profBufWordCount = 1 << 17
    31  	// profBufTagCount is the size of the CPU profile buffer's storage for the
    32  	// goroutine tags associated with each sample. A capacity of 1<<14 means
    33  	// room for 16k samples, or 160 thread-seconds at a 100 Hz sample rate.
    34  	profBufTagCount = 1 << 14
    35  )
    36  
    37  type cpuProfile struct {
    38  	lock mutex
    39  	on   bool     // profiling is on
    40  	log  *profBuf // profile events written here
    41  
    42  	// extra holds extra stacks accumulated in addNonGo
    43  	// corresponding to profiling signals arriving on
    44  	// non-Go-created threads. Those stacks are written
    45  	// to log the next time a normal Go thread gets the
    46  	// signal handler.
    47  	// Assuming the stacks are 2 words each (we don't get
    48  	// a full traceback from those threads), plus one word
    49  	// size for framing, 100 Hz profiling would generate
    50  	// 300 words per second.
    51  	// Hopefully a normal Go thread will get the profiling
    52  	// signal at least once every few seconds.
    53  	extra      [1000]uintptr
    54  	numExtra   int
    55  	lostExtra  uint64 // count of frames lost because extra is full
    56  	lostAtomic uint64 // count of frames lost because of being in atomic64 on mips/arm; updated racily
    57  }
    58  
    59  var cpuprof cpuProfile
    60  
    61  // SetCPUProfileRate sets the CPU profiling rate to hz samples per second.
    62  // If hz <= 0, SetCPUProfileRate turns off profiling.
    63  // If the profiler is on, the rate cannot be changed without first turning it off.
    64  //
    65  // Most clients should use the [runtime/pprof] package or
    66  // the [testing] package's -test.cpuprofile flag instead of calling
    67  // SetCPUProfileRate directly.
    68  func SetCPUProfileRate(hz int) {
    69  	// Clamp hz to something reasonable.
    70  	if hz < 0 {
    71  		hz = 0
    72  	}
    73  	if hz > 1000000 {
    74  		hz = 1000000
    75  	}
    76  
    77  	lock(&cpuprof.lock)
    78  	if hz > 0 {
    79  		if cpuprof.on || cpuprof.log != nil {
    80  			print("runtime: cannot set cpu profile rate until previous profile has finished.\n")
    81  			unlock(&cpuprof.lock)
    82  			return
    83  		}
    84  
    85  		cpuprof.on = true
    86  		cpuprof.log = newProfBuf(1, profBufWordCount, profBufTagCount)
    87  		hdr := [1]uint64{uint64(hz)}
    88  		cpuprof.log.write(nil, nanotime(), hdr[:], nil)
    89  		setcpuprofilerate(int32(hz))
    90  	} else if cpuprof.on {
    91  		setcpuprofilerate(0)
    92  		cpuprof.on = false
    93  		cpuprof.addExtra()
    94  		cpuprof.log.close()
    95  	}
    96  	unlock(&cpuprof.lock)
    97  }
    98  
    99  // add adds the stack trace to the profile.
   100  // It is called from signal handlers and other limited environments
   101  // and cannot allocate memory or acquire locks that might be
   102  // held at the time of the signal, nor can it use substantial amounts
   103  // of stack.
   104  //
   105  //go:nowritebarrierrec
   106  func (p *cpuProfile) add(tagPtr *unsafe.Pointer, stk []uintptr) {
   107  	// Simple cas-lock to coordinate with setcpuprofilerate.
   108  	for !prof.signalLock.CompareAndSwap(0, 1) {
   109  		// TODO: Is it safe to osyield here? https://go.dev/issue/52672
   110  		osyield()
   111  	}
   112  
   113  	if prof.hz.Load() != 0 { // implies cpuprof.log != nil
   114  		if p.numExtra > 0 || p.lostExtra > 0 || p.lostAtomic > 0 {
   115  			p.addExtra()
   116  		}
   117  		hdr := [1]uint64{1}
   118  		// Note: write "knows" that the argument is &gp.labels,
   119  		// because otherwise its write barrier behavior may not
   120  		// be correct. See the long comment there before
   121  		// changing the argument here.
   122  		cpuprof.log.write(tagPtr, nanotime(), hdr[:], stk)
   123  	}
   124  
   125  	prof.signalLock.Store(0)
   126  }
   127  
   128  // addNonGo adds the non-Go stack trace to the profile.
   129  // It is called from a non-Go thread, so we cannot use much stack at all,
   130  // nor do anything that needs a g or an m.
   131  // In particular, we can't call cpuprof.log.write.
   132  // Instead, we copy the stack into cpuprof.extra,
   133  // which will be drained the next time a Go thread
   134  // gets the signal handling event.
   135  //
   136  //go:nosplit
   137  //go:nowritebarrierrec
   138  func (p *cpuProfile) addNonGo(stk []uintptr) {
   139  	// Simple cas-lock to coordinate with SetCPUProfileRate.
   140  	// (Other calls to add or addNonGo should be blocked out
   141  	// by the fact that only one SIGPROF can be handled by the
   142  	// process at a time. If not, this lock will serialize those too.
   143  	// The use of timer_create(2) on Linux to request process-targeted
   144  	// signals may have changed this.)
   145  	for !prof.signalLock.CompareAndSwap(0, 1) {
   146  		// TODO: Is it safe to osyield here? https://go.dev/issue/52672
   147  		osyield()
   148  	}
   149  
   150  	if cpuprof.numExtra+1+len(stk) < len(cpuprof.extra) {
   151  		i := cpuprof.numExtra
   152  		cpuprof.extra[i] = uintptr(1 + len(stk))
   153  		copy(cpuprof.extra[i+1:], stk)
   154  		cpuprof.numExtra += 1 + len(stk)
   155  	} else {
   156  		cpuprof.lostExtra++
   157  	}
   158  
   159  	prof.signalLock.Store(0)
   160  }
   161  
   162  // addExtra adds the "extra" profiling events,
   163  // queued by addNonGo, to the profile log.
   164  // addExtra is called either from a signal handler on a Go thread
   165  // or from an ordinary goroutine; either way it can use stack
   166  // and has a g. The world may be stopped, though.
   167  func (p *cpuProfile) addExtra() {
   168  	// Copy accumulated non-Go profile events.
   169  	hdr := [1]uint64{1}
   170  	for i := 0; i < p.numExtra; {
   171  		p.log.write(nil, 0, hdr[:], p.extra[i+1:i+int(p.extra[i])])
   172  		i += int(p.extra[i])
   173  	}
   174  	p.numExtra = 0
   175  
   176  	// Report any lost events.
   177  	if p.lostExtra > 0 {
   178  		hdr := [1]uint64{p.lostExtra}
   179  		lostStk := [2]uintptr{
   180  			abi.FuncPCABIInternal(_LostExternalCode) + sys.PCQuantum,
   181  			abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum,
   182  		}
   183  		p.log.write(nil, 0, hdr[:], lostStk[:])
   184  		p.lostExtra = 0
   185  	}
   186  
   187  	if p.lostAtomic > 0 {
   188  		hdr := [1]uint64{p.lostAtomic}
   189  		lostStk := [2]uintptr{
   190  			abi.FuncPCABIInternal(_LostSIGPROFDuringAtomic64) + sys.PCQuantum,
   191  			abi.FuncPCABIInternal(_System) + sys.PCQuantum,
   192  		}
   193  		p.log.write(nil, 0, hdr[:], lostStk[:])
   194  		p.lostAtomic = 0
   195  	}
   196  
   197  }
   198  
   199  // CPUProfile panics.
   200  // It formerly provided raw access to chunks of
   201  // a pprof-format profile generated by the runtime.
   202  // The details of generating that format have changed,
   203  // so this functionality has been removed.
   204  //
   205  // Deprecated: Use the [runtime/pprof] package,
   206  // or the handlers in the [net/http/pprof] package,
   207  // or the [testing] package's -test.cpuprofile flag instead.
   208  func CPUProfile() []byte {
   209  	panic("CPUProfile no longer available")
   210  }
   211  
   212  //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
   213  func runtime_pprof_runtime_cyclesPerSecond() int64 {
   214  	return ticksPerSecond()
   215  }
   216  
   217  // readProfile, provided to runtime/pprof, returns the next chunk of
   218  // binary CPU profiling stack trace data, blocking until data is available.
   219  // If profiling is turned off and all the profile data accumulated while it was
   220  // on has been returned, readProfile returns eof=true.
   221  // The caller must save the returned data and tags before calling readProfile again.
   222  // The returned data contains a whole number of records, and tags contains
   223  // exactly one entry per record.
   224  //
   225  //go:linkname runtime_pprof_readProfile runtime/pprof.readProfile
   226  func runtime_pprof_readProfile() ([]uint64, []unsafe.Pointer, bool) {
   227  	lock(&cpuprof.lock)
   228  	log := cpuprof.log
   229  	unlock(&cpuprof.lock)
   230  	readMode := profBufBlocking
   231  	if GOOS == "darwin" || GOOS == "ios" {
   232  		readMode = profBufNonBlocking // For #61768; on Darwin notes are not async-signal-safe.  See sigNoteSetup in os_darwin.go.
   233  	}
   234  	data, tags, eof := log.read(readMode)
   235  	if len(data) == 0 && eof {
   236  		lock(&cpuprof.lock)
   237  		cpuprof.log = nil
   238  		unlock(&cpuprof.lock)
   239  	}
   240  	return data, tags, eof
   241  }
   242
View as plain text