...
Run Format

Source file src/testing/benchmark.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package testing
     6	
     7	import (
     8		"flag"
     9		"fmt"
    10		"internal/race"
    11		"os"
    12		"runtime"
    13		"sync"
    14		"sync/atomic"
    15		"time"
    16	)
    17	
    18	var matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
    19	var benchTime = flag.Duration("test.benchtime", 1*time.Second, "run each benchmark for duration `d`")
    20	var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
    21	
    22	// Global lock to ensure only one benchmark runs at a time.
    23	var benchmarkLock sync.Mutex
    24	
    25	// Used for every benchmark for measuring memory.
    26	var memStats runtime.MemStats
    27	
    28	// An internal type but exported because it is cross-package; part of the implementation
    29	// of the "go test" command.
    30	type InternalBenchmark struct {
    31		Name string
    32		F    func(b *B)
    33	}
    34	
    35	// B is a type passed to Benchmark functions to manage benchmark
    36	// timing and to specify the number of iterations to run.
    37	//
    38	// A benchmark ends when its Benchmark function returns or calls any of the methods
    39	// FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called
    40	// only from the goroutine running the Benchmark function.
    41	// The other reporting methods, such as the variations of Log and Error,
    42	// may be called simultaneously from multiple goroutines.
    43	//
    44	// Like in tests, benchmark logs are accumulated during execution
    45	// and dumped to standard error when done. Unlike in tests, benchmark logs
    46	// are always printed, so as not to hide output whose existence may be
    47	// affecting benchmark results.
    48	type B struct {
    49		common
    50		context          *benchContext
    51		N                int
    52		previousN        int           // number of iterations in the previous run
    53		previousDuration time.Duration // total duration of the previous run
    54		benchFunc        func(b *B)
    55		benchTime        time.Duration
    56		bytes            int64
    57		missingBytes     bool // one of the subbenchmarks does not have bytes set.
    58		timerOn          bool
    59		showAllocResult  bool
    60		result           BenchmarkResult
    61		parallelism      int // RunParallel creates parallelism*GOMAXPROCS goroutines
    62		// The initial states of memStats.Mallocs and memStats.TotalAlloc.
    63		startAllocs uint64
    64		startBytes  uint64
    65		// The net total of this test after being run.
    66		netAllocs uint64
    67		netBytes  uint64
    68	}
    69	
    70	// StartTimer starts timing a test. This function is called automatically
    71	// before a benchmark starts, but it can also used to resume timing after
    72	// a call to StopTimer.
    73	func (b *B) StartTimer() {
    74		if !b.timerOn {
    75			runtime.ReadMemStats(&memStats)
    76			b.startAllocs = memStats.Mallocs
    77			b.startBytes = memStats.TotalAlloc
    78			b.start = time.Now()
    79			b.timerOn = true
    80		}
    81	}
    82	
    83	// StopTimer stops timing a test. This can be used to pause the timer
    84	// while performing complex initialization that you don't
    85	// want to measure.
    86	func (b *B) StopTimer() {
    87		if b.timerOn {
    88			b.duration += time.Now().Sub(b.start)
    89			runtime.ReadMemStats(&memStats)
    90			b.netAllocs += memStats.Mallocs - b.startAllocs
    91			b.netBytes += memStats.TotalAlloc - b.startBytes
    92			b.timerOn = false
    93		}
    94	}
    95	
    96	// ResetTimer zeros the elapsed benchmark time and memory allocation counters.
    97	// It does not affect whether the timer is running.
    98	func (b *B) ResetTimer() {
    99		if b.timerOn {
   100			runtime.ReadMemStats(&memStats)
   101			b.startAllocs = memStats.Mallocs
   102			b.startBytes = memStats.TotalAlloc
   103			b.start = time.Now()
   104		}
   105		b.duration = 0
   106		b.netAllocs = 0
   107		b.netBytes = 0
   108	}
   109	
   110	// SetBytes records the number of bytes processed in a single operation.
   111	// If this is called, the benchmark will report ns/op and MB/s.
   112	func (b *B) SetBytes(n int64) { b.bytes = n }
   113	
   114	// ReportAllocs enables malloc statistics for this benchmark.
   115	// It is equivalent to setting -test.benchmem, but it only affects the
   116	// benchmark function that calls ReportAllocs.
   117	func (b *B) ReportAllocs() {
   118		b.showAllocResult = true
   119	}
   120	
   121	func (b *B) nsPerOp() int64 {
   122		if b.N <= 0 {
   123			return 0
   124		}
   125		return b.duration.Nanoseconds() / int64(b.N)
   126	}
   127	
   128	// runN runs a single benchmark for the specified number of iterations.
   129	func (b *B) runN(n int) {
   130		benchmarkLock.Lock()
   131		defer benchmarkLock.Unlock()
   132		// Try to get a comparable environment for each run
   133		// by clearing garbage from previous runs.
   134		runtime.GC()
   135		b.raceErrors = -race.Errors()
   136		b.N = n
   137		b.parallelism = 1
   138		b.ResetTimer()
   139		b.StartTimer()
   140		b.benchFunc(b)
   141		b.StopTimer()
   142		b.previousN = n
   143		b.previousDuration = b.duration
   144		b.raceErrors += race.Errors()
   145		if b.raceErrors > 0 {
   146			b.Errorf("race detected during execution of benchmark")
   147		}
   148	}
   149	
   150	func min(x, y int) int {
   151		if x > y {
   152			return y
   153		}
   154		return x
   155	}
   156	
   157	func max(x, y int) int {
   158		if x < y {
   159			return y
   160		}
   161		return x
   162	}
   163	
   164	// roundDown10 rounds a number down to the nearest power of 10.
   165	func roundDown10(n int) int {
   166		var tens = 0
   167		// tens = floor(log_10(n))
   168		for n >= 10 {
   169			n = n / 10
   170			tens++
   171		}
   172		// result = 10^tens
   173		result := 1
   174		for i := 0; i < tens; i++ {
   175			result *= 10
   176		}
   177		return result
   178	}
   179	
   180	// roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX].
   181	func roundUp(n int) int {
   182		base := roundDown10(n)
   183		switch {
   184		case n <= base:
   185			return base
   186		case n <= (2 * base):
   187			return 2 * base
   188		case n <= (3 * base):
   189			return 3 * base
   190		case n <= (5 * base):
   191			return 5 * base
   192		default:
   193			return 10 * base
   194		}
   195	}
   196	
   197	// run1 runs the first iteration of benchFunc. It returns whether more
   198	// iterations of this benchmarks should be run.
   199	func (b *B) run1() bool {
   200		if ctx := b.context; ctx != nil {
   201			// Extend maxLen, if needed.
   202			if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen {
   203				ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size.
   204			}
   205		}
   206		go func() {
   207			// Signal that we're done whether we return normally
   208			// or by FailNow's runtime.Goexit.
   209			defer func() {
   210				b.signal <- true
   211			}()
   212	
   213			b.runN(1)
   214		}()
   215		<-b.signal
   216		if b.failed {
   217			fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output)
   218			return false
   219		}
   220		// Only print the output if we know we are not going to proceed.
   221		// Otherwise it is printed in processBench.
   222		if atomic.LoadInt32(&b.hasSub) != 0 || b.finished {
   223			tag := "BENCH"
   224			if b.skipped {
   225				tag = "SKIP"
   226			}
   227			if b.chatty && (len(b.output) > 0 || b.finished) {
   228				b.trimOutput()
   229				fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output)
   230			}
   231			return false
   232		}
   233		return true
   234	}
   235	
   236	// run executes the benchmark in a separate goroutine, including all of its
   237	// subbenchmarks. b must not have subbenchmarks.
   238	func (b *B) run() BenchmarkResult {
   239		if b.context != nil {
   240			// Running go test --test.bench
   241			b.context.processBench(b) // Must call doBench.
   242		} else {
   243			// Running func Benchmark.
   244			b.doBench()
   245		}
   246		return b.result
   247	}
   248	
   249	func (b *B) doBench() BenchmarkResult {
   250		go b.launch()
   251		<-b.signal
   252		return b.result
   253	}
   254	
   255	// launch launches the benchmark function. It gradually increases the number
   256	// of benchmark iterations until the benchmark runs for the requested benchtime.
   257	// launch is run by the doBench function as a separate goroutine.
   258	// run1 must have been called on b.
   259	func (b *B) launch() {
   260		// Signal that we're done whether we return normally
   261		// or by FailNow's runtime.Goexit.
   262		defer func() {
   263			b.signal <- true
   264		}()
   265	
   266		// Run the benchmark for at least the specified amount of time.
   267		d := b.benchTime
   268		for n := 1; !b.failed && b.duration < d && n < 1e9; {
   269			last := n
   270			// Predict required iterations.
   271			n = int(d.Nanoseconds())
   272			if nsop := b.nsPerOp(); nsop != 0 {
   273				n /= int(nsop)
   274			}
   275			// Run more iterations than we think we'll need (1.2x).
   276			// Don't grow too fast in case we had timing errors previously.
   277			// Be sure to run at least one more than last time.
   278			n = max(min(n+n/5, 100*last), last+1)
   279			// Round up to something easy to read.
   280			n = roundUp(n)
   281			b.runN(n)
   282		}
   283		b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes}
   284	}
   285	
   286	// The results of a benchmark run.
   287	type BenchmarkResult struct {
   288		N         int           // The number of iterations.
   289		T         time.Duration // The total time taken.
   290		Bytes     int64         // Bytes processed in one iteration.
   291		MemAllocs uint64        // The total number of memory allocations.
   292		MemBytes  uint64        // The total number of bytes allocated.
   293	}
   294	
   295	func (r BenchmarkResult) NsPerOp() int64 {
   296		if r.N <= 0 {
   297			return 0
   298		}
   299		return r.T.Nanoseconds() / int64(r.N)
   300	}
   301	
   302	func (r BenchmarkResult) mbPerSec() float64 {
   303		if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
   304			return 0
   305		}
   306		return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
   307	}
   308	
   309	func (r BenchmarkResult) AllocsPerOp() int64 {
   310		if r.N <= 0 {
   311			return 0
   312		}
   313		return int64(r.MemAllocs) / int64(r.N)
   314	}
   315	
   316	func (r BenchmarkResult) AllocedBytesPerOp() int64 {
   317		if r.N <= 0 {
   318			return 0
   319		}
   320		return int64(r.MemBytes) / int64(r.N)
   321	}
   322	
   323	func (r BenchmarkResult) String() string {
   324		mbs := r.mbPerSec()
   325		mb := ""
   326		if mbs != 0 {
   327			mb = fmt.Sprintf("\t%7.2f MB/s", mbs)
   328		}
   329		nsop := r.NsPerOp()
   330		ns := fmt.Sprintf("%10d ns/op", nsop)
   331		if r.N > 0 && nsop < 100 {
   332			// The format specifiers here make sure that
   333			// the ones digits line up for all three possible formats.
   334			if nsop < 10 {
   335				ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
   336			} else {
   337				ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
   338			}
   339		}
   340		return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb)
   341	}
   342	
   343	func (r BenchmarkResult) MemString() string {
   344		return fmt.Sprintf("%8d B/op\t%8d allocs/op",
   345			r.AllocedBytesPerOp(), r.AllocsPerOp())
   346	}
   347	
   348	// benchmarkName returns full name of benchmark including procs suffix.
   349	func benchmarkName(name string, n int) string {
   350		if n != 1 {
   351			return fmt.Sprintf("%s-%d", name, n)
   352		}
   353		return name
   354	}
   355	
   356	type benchContext struct {
   357		match *matcher
   358	
   359		maxLen int // The largest recorded benchmark name.
   360		extLen int // Maximum extension length.
   361	}
   362	
   363	// An internal function but exported because it is cross-package; part of the implementation
   364	// of the "go test" command.
   365	func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
   366		runBenchmarks(matchString, benchmarks)
   367	}
   368	
   369	func runBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
   370		// If no flag was specified, don't run benchmarks.
   371		if len(*matchBenchmarks) == 0 {
   372			return true
   373		}
   374		// Collect matching benchmarks and determine longest name.
   375		maxprocs := 1
   376		for _, procs := range cpuList {
   377			if procs > maxprocs {
   378				maxprocs = procs
   379			}
   380		}
   381		ctx := &benchContext{
   382			match:  newMatcher(matchString, *matchBenchmarks, "-test.bench"),
   383			extLen: len(benchmarkName("", maxprocs)),
   384		}
   385		var bs []InternalBenchmark
   386		for _, Benchmark := range benchmarks {
   387			if _, matched := ctx.match.fullName(nil, Benchmark.Name); matched {
   388				bs = append(bs, Benchmark)
   389				benchName := benchmarkName(Benchmark.Name, maxprocs)
   390				if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen {
   391					ctx.maxLen = l
   392				}
   393			}
   394		}
   395		main := &B{
   396			common: common{
   397				name:   "Main",
   398				w:      os.Stdout,
   399				chatty: *chatty,
   400			},
   401			benchFunc: func(b *B) {
   402				for _, Benchmark := range bs {
   403					b.Run(Benchmark.Name, Benchmark.F)
   404				}
   405			},
   406			benchTime: *benchTime,
   407			context:   ctx,
   408		}
   409		main.runN(1)
   410		return !main.failed
   411	}
   412	
   413	// processBench runs bench b for the configured CPU counts and prints the results.
   414	func (ctx *benchContext) processBench(b *B) {
   415		for i, procs := range cpuList {
   416			runtime.GOMAXPROCS(procs)
   417			benchName := benchmarkName(b.name, procs)
   418			fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
   419			// Recompute the running time for all but the first iteration.
   420			if i > 0 {
   421				b = &B{
   422					common: common{
   423						signal: make(chan bool),
   424						name:   b.name,
   425						w:      b.w,
   426						chatty: b.chatty,
   427					},
   428					benchFunc: b.benchFunc,
   429					benchTime: b.benchTime,
   430				}
   431				b.run1()
   432			}
   433			r := b.doBench()
   434			if b.failed {
   435				// The output could be very long here, but probably isn't.
   436				// We print it all, regardless, because we don't want to trim the reason
   437				// the benchmark failed.
   438				fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output)
   439				continue
   440			}
   441			results := r.String()
   442			if *benchmarkMemory || b.showAllocResult {
   443				results += "\t" + r.MemString()
   444			}
   445			fmt.Fprintln(b.w, results)
   446			// Unlike with tests, we ignore the -chatty flag and always print output for
   447			// benchmarks since the output generation time will skew the results.
   448			if len(b.output) > 0 {
   449				b.trimOutput()
   450				fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output)
   451			}
   452			if p := runtime.GOMAXPROCS(-1); p != procs {
   453				fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
   454			}
   455		}
   456	}
   457	
   458	// Run benchmarks f as a subbenchmark with the given name. It reports
   459	// whether there were any failures.
   460	//
   461	// A subbenchmark is like any other benchmark. A benchmark that calls Run at
   462	// least once will not be measured itself and will be called once with N=1.
   463	//
   464	// Run may be called simultaneously from multiple goroutines, but all such
   465	// calls must happen before the outer benchmark function for b returns.
   466	func (b *B) Run(name string, f func(b *B)) bool {
   467		// Since b has subbenchmarks, we will no longer run it as a benchmark itself.
   468		// Release the lock and acquire it on exit to ensure locks stay paired.
   469		atomic.StoreInt32(&b.hasSub, 1)
   470		benchmarkLock.Unlock()
   471		defer benchmarkLock.Lock()
   472	
   473		benchName, ok := b.name, true
   474		if b.context != nil {
   475			benchName, ok = b.context.match.fullName(&b.common, name)
   476		}
   477		if !ok {
   478			return true
   479		}
   480		sub := &B{
   481			common: common{
   482				signal: make(chan bool),
   483				name:   benchName,
   484				parent: &b.common,
   485				level:  b.level + 1,
   486				w:      b.w,
   487				chatty: b.chatty,
   488			},
   489			benchFunc: f,
   490			benchTime: b.benchTime,
   491			context:   b.context,
   492		}
   493		if sub.run1() {
   494			sub.run()
   495		}
   496		b.add(sub.result)
   497		return !sub.failed
   498	}
   499	
   500	// add simulates running benchmarks in sequence in a single iteration. It is
   501	// used to give some meaningful results in case func Benchmark is used in
   502	// combination with Run.
   503	func (b *B) add(other BenchmarkResult) {
   504		r := &b.result
   505		// The aggregated BenchmarkResults resemble running all subbenchmarks as
   506		// in sequence in a single benchmark.
   507		r.N = 1
   508		r.T += time.Duration(other.NsPerOp())
   509		if other.Bytes == 0 {
   510			// Summing Bytes is meaningless in aggregate if not all subbenchmarks
   511			// set it.
   512			b.missingBytes = true
   513			r.Bytes = 0
   514		}
   515		if !b.missingBytes {
   516			r.Bytes += other.Bytes
   517		}
   518		r.MemAllocs += uint64(other.AllocsPerOp())
   519		r.MemBytes += uint64(other.AllocedBytesPerOp())
   520	}
   521	
   522	// trimOutput shortens the output from a benchmark, which can be very long.
   523	func (b *B) trimOutput() {
   524		// The output is likely to appear multiple times because the benchmark
   525		// is run multiple times, but at least it will be seen. This is not a big deal
   526		// because benchmarks rarely print, but just in case, we trim it if it's too long.
   527		const maxNewlines = 10
   528		for nlCount, j := 0, 0; j < len(b.output); j++ {
   529			if b.output[j] == '\n' {
   530				nlCount++
   531				if nlCount >= maxNewlines {
   532					b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
   533					break
   534				}
   535			}
   536		}
   537	}
   538	
   539	// A PB is used by RunParallel for running parallel benchmarks.
   540	type PB struct {
   541		globalN *uint64 // shared between all worker goroutines iteration counter
   542		grain   uint64  // acquire that many iterations from globalN at once
   543		cache   uint64  // local cache of acquired iterations
   544		bN      uint64  // total number of iterations to execute (b.N)
   545	}
   546	
   547	// Next reports whether there are more iterations to execute.
   548	func (pb *PB) Next() bool {
   549		if pb.cache == 0 {
   550			n := atomic.AddUint64(pb.globalN, pb.grain)
   551			if n <= pb.bN {
   552				pb.cache = pb.grain
   553			} else if n < pb.bN+pb.grain {
   554				pb.cache = pb.bN + pb.grain - n
   555			} else {
   556				return false
   557			}
   558		}
   559		pb.cache--
   560		return true
   561	}
   562	
   563	// RunParallel runs a benchmark in parallel.
   564	// It creates multiple goroutines and distributes b.N iterations among them.
   565	// The number of goroutines defaults to GOMAXPROCS. To increase parallelism for
   566	// non-CPU-bound benchmarks, call SetParallelism before RunParallel.
   567	// RunParallel is usually used with the go test -cpu flag.
   568	//
   569	// The body function will be run in each goroutine. It should set up any
   570	// goroutine-local state and then iterate until pb.Next returns false.
   571	// It should not use the StartTimer, StopTimer, or ResetTimer functions,
   572	// because they have global effect. It should also not call Run.
   573	func (b *B) RunParallel(body func(*PB)) {
   574		if b.N == 0 {
   575			return // Nothing to do when probing.
   576		}
   577		// Calculate grain size as number of iterations that take ~100µs.
   578		// 100µs is enough to amortize the overhead and provide sufficient
   579		// dynamic load balancing.
   580		grain := uint64(0)
   581		if b.previousN > 0 && b.previousDuration > 0 {
   582			grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
   583		}
   584		if grain < 1 {
   585			grain = 1
   586		}
   587		// We expect the inner loop and function call to take at least 10ns,
   588		// so do not do more than 100µs/10ns=1e4 iterations.
   589		if grain > 1e4 {
   590			grain = 1e4
   591		}
   592	
   593		n := uint64(0)
   594		numProcs := b.parallelism * runtime.GOMAXPROCS(0)
   595		var wg sync.WaitGroup
   596		wg.Add(numProcs)
   597		for p := 0; p < numProcs; p++ {
   598			go func() {
   599				defer wg.Done()
   600				pb := &PB{
   601					globalN: &n,
   602					grain:   grain,
   603					bN:      uint64(b.N),
   604				}
   605				body(pb)
   606			}()
   607		}
   608		wg.Wait()
   609		if n <= uint64(b.N) && !b.Failed() {
   610			b.Fatal("RunParallel: body exited without pb.Next() == false")
   611		}
   612	}
   613	
   614	// SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS.
   615	// There is usually no need to call SetParallelism for CPU-bound benchmarks.
   616	// If p is less than 1, this call will have no effect.
   617	func (b *B) SetParallelism(p int) {
   618		if p >= 1 {
   619			b.parallelism = p
   620		}
   621	}
   622	
   623	// Benchmark benchmarks a single function. Useful for creating
   624	// custom benchmarks that do not use the "go test" command.
   625	//
   626	// If f calls Run, the result will be an estimate of running all its
   627	// subbenchmarks that don't call Run in sequence in a single benchmark.
   628	func Benchmark(f func(b *B)) BenchmarkResult {
   629		b := &B{
   630			common: common{
   631				signal: make(chan bool),
   632				w:      discard{},
   633			},
   634			benchFunc: f,
   635			benchTime: *benchTime,
   636		}
   637		if !b.run1() {
   638			return BenchmarkResult{}
   639		}
   640		return b.run()
   641	}
   642	
   643	type discard struct{}
   644	
   645	func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
   646	

View as plain text