Source file src/cmd/vendor/github.com/google/pprof/profile/legacy_profile.go

     1  // Copyright 2014 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // This file implements parsers to convert legacy profiles into the
    16  // profile.proto format.
    17  
    18  package profile
    19  
    20  import (
    21  	"bufio"
    22  	"bytes"
    23  	"fmt"
    24  	"io"
    25  	"math"
    26  	"regexp"
    27  	"strconv"
    28  	"strings"
    29  )
    30  
    31  var (
    32  	countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`)
    33  	countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
    34  
    35  	heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
    36  	heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
    37  
    38  	contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
    39  
    40  	hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
    41  
    42  	growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
    43  
    44  	fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
    45  
    46  	threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
    47  	threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
    48  
    49  	// Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools.
    50  	// Recommended format:
    51  	// Start   End     object file name     offset(optional)   linker build id
    52  	// 0x40000-0x80000 /path/to/binary      (@FF00)            abc123456
    53  	spaceDigits = `\s+[[:digit:]]+`
    54  	hexPair     = `\s+[[:xdigit:]]+:[[:xdigit:]]+`
    55  	oSpace      = `\s*`
    56  	// Capturing expressions.
    57  	cHex           = `(?:0x)?([[:xdigit:]]+)`
    58  	cHexRange      = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?`
    59  	cSpaceString   = `(?:\s+(\S+))?`
    60  	cSpaceHex      = `(?:\s+([[:xdigit:]]+))?`
    61  	cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?`
    62  	cPerm          = `(?:\s+([-rwxp]+))?`
    63  
    64  	procMapsRE  = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString)
    65  	briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex)
    66  
    67  	// Regular expression to parse log data, of the form:
    68  	// ... file:line] msg...
    69  	logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`)
    70  )
    71  
    72  func isSpaceOrComment(line string) bool {
    73  	trimmed := strings.TrimSpace(line)
    74  	return len(trimmed) == 0 || trimmed[0] == '#'
    75  }
    76  
    77  // parseGoCount parses a Go count profile (e.g., threadcreate or
    78  // goroutine) and returns a new Profile.
    79  func parseGoCount(b []byte) (*Profile, error) {
    80  	s := bufio.NewScanner(bytes.NewBuffer(b))
    81  	// Skip comments at the beginning of the file.
    82  	for s.Scan() && isSpaceOrComment(s.Text()) {
    83  	}
    84  	if err := s.Err(); err != nil {
    85  		return nil, err
    86  	}
    87  	m := countStartRE.FindStringSubmatch(s.Text())
    88  	if m == nil {
    89  		return nil, errUnrecognized
    90  	}
    91  	profileType := m[1]
    92  	p := &Profile{
    93  		PeriodType: &ValueType{Type: profileType, Unit: "count"},
    94  		Period:     1,
    95  		SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
    96  	}
    97  	locations := make(map[uint64]*Location)
    98  	for s.Scan() {
    99  		line := s.Text()
   100  		if isSpaceOrComment(line) {
   101  			continue
   102  		}
   103  		if strings.HasPrefix(line, "---") {
   104  			break
   105  		}
   106  		m := countRE.FindStringSubmatch(line)
   107  		if m == nil {
   108  			return nil, errMalformed
   109  		}
   110  		n, err := strconv.ParseInt(m[1], 0, 64)
   111  		if err != nil {
   112  			return nil, errMalformed
   113  		}
   114  		fields := strings.Fields(m[2])
   115  		locs := make([]*Location, 0, len(fields))
   116  		for _, stk := range fields {
   117  			addr, err := strconv.ParseUint(stk, 0, 64)
   118  			if err != nil {
   119  				return nil, errMalformed
   120  			}
   121  			// Adjust all frames by -1 to land on top of the call instruction.
   122  			addr--
   123  			loc := locations[addr]
   124  			if loc == nil {
   125  				loc = &Location{
   126  					Address: addr,
   127  				}
   128  				locations[addr] = loc
   129  				p.Location = append(p.Location, loc)
   130  			}
   131  			locs = append(locs, loc)
   132  		}
   133  		p.Sample = append(p.Sample, &Sample{
   134  			Location: locs,
   135  			Value:    []int64{n},
   136  		})
   137  	}
   138  	if err := s.Err(); err != nil {
   139  		return nil, err
   140  	}
   141  
   142  	if err := parseAdditionalSections(s, p); err != nil {
   143  		return nil, err
   144  	}
   145  	return p, nil
   146  }
   147  
   148  // remapLocationIDs ensures there is a location for each address
   149  // referenced by a sample, and remaps the samples to point to the new
   150  // location ids.
   151  func (p *Profile) remapLocationIDs() {
   152  	seen := make(map[*Location]bool, len(p.Location))
   153  	var locs []*Location
   154  
   155  	for _, s := range p.Sample {
   156  		for _, l := range s.Location {
   157  			if seen[l] {
   158  				continue
   159  			}
   160  			l.ID = uint64(len(locs) + 1)
   161  			locs = append(locs, l)
   162  			seen[l] = true
   163  		}
   164  	}
   165  	p.Location = locs
   166  }
   167  
   168  func (p *Profile) remapFunctionIDs() {
   169  	seen := make(map[*Function]bool, len(p.Function))
   170  	var fns []*Function
   171  
   172  	for _, l := range p.Location {
   173  		for _, ln := range l.Line {
   174  			fn := ln.Function
   175  			if fn == nil || seen[fn] {
   176  				continue
   177  			}
   178  			fn.ID = uint64(len(fns) + 1)
   179  			fns = append(fns, fn)
   180  			seen[fn] = true
   181  		}
   182  	}
   183  	p.Function = fns
   184  }
   185  
   186  // remapMappingIDs matches location addresses with existing mappings
   187  // and updates them appropriately. This is O(N*M), if this ever shows
   188  // up as a bottleneck, evaluate sorting the mappings and doing a
   189  // binary search, which would make it O(N*log(M)).
   190  func (p *Profile) remapMappingIDs() {
   191  	// Some profile handlers will incorrectly set regions for the main
   192  	// executable if its section is remapped. Fix them through heuristics.
   193  
   194  	if len(p.Mapping) > 0 {
   195  		// Remove the initial mapping if named '/anon_hugepage' and has a
   196  		// consecutive adjacent mapping.
   197  		if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
   198  			if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
   199  				p.Mapping = p.Mapping[1:]
   200  			}
   201  		}
   202  	}
   203  
   204  	// Subtract the offset from the start of the main mapping if it
   205  	// ends up at a recognizable start address.
   206  	if len(p.Mapping) > 0 {
   207  		const expectedStart = 0x400000
   208  		if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
   209  			m.Start = expectedStart
   210  			m.Offset = 0
   211  		}
   212  	}
   213  
   214  	// Associate each location with an address to the corresponding
   215  	// mapping. Create fake mapping if a suitable one isn't found.
   216  	var fake *Mapping
   217  nextLocation:
   218  	for _, l := range p.Location {
   219  		a := l.Address
   220  		if l.Mapping != nil || a == 0 {
   221  			continue
   222  		}
   223  		for _, m := range p.Mapping {
   224  			if m.Start <= a && a < m.Limit {
   225  				l.Mapping = m
   226  				continue nextLocation
   227  			}
   228  		}
   229  		// Work around legacy handlers failing to encode the first
   230  		// part of mappings split into adjacent ranges.
   231  		for _, m := range p.Mapping {
   232  			if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
   233  				m.Start -= m.Offset
   234  				m.Offset = 0
   235  				l.Mapping = m
   236  				continue nextLocation
   237  			}
   238  		}
   239  		// If there is still no mapping, create a fake one.
   240  		// This is important for the Go legacy handler, which produced
   241  		// no mappings.
   242  		if fake == nil {
   243  			fake = &Mapping{
   244  				ID:    1,
   245  				Limit: ^uint64(0),
   246  			}
   247  			p.Mapping = append(p.Mapping, fake)
   248  		}
   249  		l.Mapping = fake
   250  	}
   251  
   252  	// Reset all mapping IDs.
   253  	for i, m := range p.Mapping {
   254  		m.ID = uint64(i + 1)
   255  	}
   256  }
   257  
   258  var cpuInts = []func([]byte) (uint64, []byte){
   259  	get32l,
   260  	get32b,
   261  	get64l,
   262  	get64b,
   263  }
   264  
   265  func get32l(b []byte) (uint64, []byte) {
   266  	if len(b) < 4 {
   267  		return 0, nil
   268  	}
   269  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
   270  }
   271  
   272  func get32b(b []byte) (uint64, []byte) {
   273  	if len(b) < 4 {
   274  		return 0, nil
   275  	}
   276  	return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
   277  }
   278  
   279  func get64l(b []byte) (uint64, []byte) {
   280  	if len(b) < 8 {
   281  		return 0, nil
   282  	}
   283  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
   284  }
   285  
   286  func get64b(b []byte) (uint64, []byte) {
   287  	if len(b) < 8 {
   288  		return 0, nil
   289  	}
   290  	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
   291  }
   292  
   293  // parseCPU parses a profilez legacy profile and returns a newly
   294  // populated Profile.
   295  //
   296  // The general format for profilez samples is a sequence of words in
   297  // binary format. The first words are a header with the following data:
   298  //
   299  //	1st word -- 0
   300  //	2nd word -- 3
   301  //	3rd word -- 0 if a c++ application, 1 if a java application.
   302  //	4th word -- Sampling period (in microseconds).
   303  //	5th word -- Padding.
   304  func parseCPU(b []byte) (*Profile, error) {
   305  	var parse func([]byte) (uint64, []byte)
   306  	var n1, n2, n3, n4, n5 uint64
   307  	for _, parse = range cpuInts {
   308  		var tmp []byte
   309  		n1, tmp = parse(b)
   310  		n2, tmp = parse(tmp)
   311  		n3, tmp = parse(tmp)
   312  		n4, tmp = parse(tmp)
   313  		n5, tmp = parse(tmp)
   314  
   315  		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
   316  			b = tmp
   317  			return cpuProfile(b, int64(n4), parse)
   318  		}
   319  		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
   320  			b = tmp
   321  			return javaCPUProfile(b, int64(n4), parse)
   322  		}
   323  	}
   324  	return nil, errUnrecognized
   325  }
   326  
   327  // cpuProfile returns a new Profile from C++ profilez data.
   328  // b is the profile bytes after the header, period is the profiling
   329  // period, and parse is a function to parse 8-byte chunks from the
   330  // profile in its native endianness.
   331  func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
   332  	p := &Profile{
   333  		Period:     period * 1000,
   334  		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
   335  		SampleType: []*ValueType{
   336  			{Type: "samples", Unit: "count"},
   337  			{Type: "cpu", Unit: "nanoseconds"},
   338  		},
   339  	}
   340  	var err error
   341  	if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
   342  		return nil, err
   343  	}
   344  
   345  	// If *most* samples have the same second-to-the-bottom frame, it
   346  	// strongly suggests that it is an uninteresting artifact of
   347  	// measurement -- a stack frame pushed by the signal handler. The
   348  	// bottom frame is always correct as it is picked up from the signal
   349  	// structure, not the stack. Check if this is the case and if so,
   350  	// remove.
   351  
   352  	// Remove up to two frames.
   353  	maxiter := 2
   354  	// Allow one different sample for this many samples with the same
   355  	// second-to-last frame.
   356  	similarSamples := 32
   357  	margin := len(p.Sample) / similarSamples
   358  
   359  	for iter := 0; iter < maxiter; iter++ {
   360  		addr1 := make(map[uint64]int)
   361  		for _, s := range p.Sample {
   362  			if len(s.Location) > 1 {
   363  				a := s.Location[1].Address
   364  				addr1[a] = addr1[a] + 1
   365  			}
   366  		}
   367  
   368  		for id1, count := range addr1 {
   369  			if count >= len(p.Sample)-margin {
   370  				// Found uninteresting frame, strip it out from all samples
   371  				for _, s := range p.Sample {
   372  					if len(s.Location) > 1 && s.Location[1].Address == id1 {
   373  						s.Location = append(s.Location[:1], s.Location[2:]...)
   374  					}
   375  				}
   376  				break
   377  			}
   378  		}
   379  	}
   380  
   381  	if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
   382  		return nil, err
   383  	}
   384  
   385  	cleanupDuplicateLocations(p)
   386  	return p, nil
   387  }
   388  
   389  func cleanupDuplicateLocations(p *Profile) {
   390  	// The profile handler may duplicate the leaf frame, because it gets
   391  	// its address both from stack unwinding and from the signal
   392  	// context. Detect this and delete the duplicate, which has been
   393  	// adjusted by -1. The leaf address should not be adjusted as it is
   394  	// not a call.
   395  	for _, s := range p.Sample {
   396  		if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
   397  			s.Location = append(s.Location[:1], s.Location[2:]...)
   398  		}
   399  	}
   400  }
   401  
   402  // parseCPUSamples parses a collection of profilez samples from a
   403  // profile.
   404  //
   405  // profilez samples are a repeated sequence of stack frames of the
   406  // form:
   407  //
   408  //	1st word -- The number of times this stack was encountered.
   409  //	2nd word -- The size of the stack (StackSize).
   410  //	3rd word -- The first address on the stack.
   411  //	...
   412  //	StackSize + 2 -- The last address on the stack
   413  //
   414  // The last stack trace is of the form:
   415  //
   416  //	1st word -- 0
   417  //	2nd word -- 1
   418  //	3rd word -- 0
   419  //
   420  // Addresses from stack traces may point to the next instruction after
   421  // each call. Optionally adjust by -1 to land somewhere on the actual
   422  // call (except for the leaf, which is not a call).
   423  func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
   424  	locs := make(map[uint64]*Location)
   425  	for len(b) > 0 {
   426  		var count, nstk uint64
   427  		count, b = parse(b)
   428  		nstk, b = parse(b)
   429  		if b == nil || nstk > uint64(len(b)/4) {
   430  			return nil, nil, errUnrecognized
   431  		}
   432  		var sloc []*Location
   433  		addrs := make([]uint64, nstk)
   434  		for i := 0; i < int(nstk); i++ {
   435  			addrs[i], b = parse(b)
   436  		}
   437  
   438  		if count == 0 && nstk == 1 && addrs[0] == 0 {
   439  			// End of data marker
   440  			break
   441  		}
   442  		for i, addr := range addrs {
   443  			if adjust && i > 0 {
   444  				addr--
   445  			}
   446  			loc := locs[addr]
   447  			if loc == nil {
   448  				loc = &Location{
   449  					Address: addr,
   450  				}
   451  				locs[addr] = loc
   452  				p.Location = append(p.Location, loc)
   453  			}
   454  			sloc = append(sloc, loc)
   455  		}
   456  		p.Sample = append(p.Sample,
   457  			&Sample{
   458  				Value:    []int64{int64(count), int64(count) * p.Period},
   459  				Location: sloc,
   460  			})
   461  	}
   462  	// Reached the end without finding the EOD marker.
   463  	return b, locs, nil
   464  }
   465  
   466  // parseHeap parses a heapz legacy or a growthz profile and
   467  // returns a newly populated Profile.
   468  func parseHeap(b []byte) (p *Profile, err error) {
   469  	s := bufio.NewScanner(bytes.NewBuffer(b))
   470  	if !s.Scan() {
   471  		if err := s.Err(); err != nil {
   472  			return nil, err
   473  		}
   474  		return nil, errUnrecognized
   475  	}
   476  	p = &Profile{}
   477  
   478  	sampling := ""
   479  	hasAlloc := false
   480  
   481  	line := s.Text()
   482  	p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
   483  	if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
   484  		sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
   485  		if err != nil {
   486  			return nil, err
   487  		}
   488  	} else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
   489  		p.Period = 1
   490  	} else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
   491  		p.Period = 1
   492  	} else {
   493  		return nil, errUnrecognized
   494  	}
   495  
   496  	if hasAlloc {
   497  		// Put alloc before inuse so that default pprof selection
   498  		// will prefer inuse_space.
   499  		p.SampleType = []*ValueType{
   500  			{Type: "alloc_objects", Unit: "count"},
   501  			{Type: "alloc_space", Unit: "bytes"},
   502  			{Type: "inuse_objects", Unit: "count"},
   503  			{Type: "inuse_space", Unit: "bytes"},
   504  		}
   505  	} else {
   506  		p.SampleType = []*ValueType{
   507  			{Type: "objects", Unit: "count"},
   508  			{Type: "space", Unit: "bytes"},
   509  		}
   510  	}
   511  
   512  	locs := make(map[uint64]*Location)
   513  	for s.Scan() {
   514  		line := strings.TrimSpace(s.Text())
   515  
   516  		if isSpaceOrComment(line) {
   517  			continue
   518  		}
   519  
   520  		if isMemoryMapSentinel(line) {
   521  			break
   522  		}
   523  
   524  		value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
   525  		if err != nil {
   526  			return nil, err
   527  		}
   528  
   529  		var sloc []*Location
   530  		for _, addr := range addrs {
   531  			// Addresses from stack traces point to the next instruction after
   532  			// each call. Adjust by -1 to land somewhere on the actual call.
   533  			addr--
   534  			loc := locs[addr]
   535  			if locs[addr] == nil {
   536  				loc = &Location{
   537  					Address: addr,
   538  				}
   539  				p.Location = append(p.Location, loc)
   540  				locs[addr] = loc
   541  			}
   542  			sloc = append(sloc, loc)
   543  		}
   544  
   545  		p.Sample = append(p.Sample, &Sample{
   546  			Value:    value,
   547  			Location: sloc,
   548  			NumLabel: map[string][]int64{"bytes": {blocksize}},
   549  		})
   550  	}
   551  	if err := s.Err(); err != nil {
   552  		return nil, err
   553  	}
   554  	if err := parseAdditionalSections(s, p); err != nil {
   555  		return nil, err
   556  	}
   557  	return p, nil
   558  }
   559  
   560  func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
   561  	header := heapHeaderRE.FindStringSubmatch(line)
   562  	if header == nil {
   563  		return "", 0, false, errUnrecognized
   564  	}
   565  
   566  	if len(header[6]) > 0 {
   567  		if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
   568  			return "", 0, false, errUnrecognized
   569  		}
   570  	}
   571  
   572  	if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
   573  		hasAlloc = true
   574  	}
   575  
   576  	switch header[5] {
   577  	case "heapz_v2", "heap_v2":
   578  		return "v2", period, hasAlloc, nil
   579  	case "heapprofile":
   580  		return "", 1, hasAlloc, nil
   581  	case "heap":
   582  		return "v2", period / 2, hasAlloc, nil
   583  	default:
   584  		return "", 0, false, errUnrecognized
   585  	}
   586  }
   587  
   588  // parseHeapSample parses a single row from a heap profile into a new Sample.
   589  func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
   590  	sampleData := heapSampleRE.FindStringSubmatch(line)
   591  	if len(sampleData) != 6 {
   592  		return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
   593  	}
   594  
   595  	// This is a local-scoped helper function to avoid needing to pass
   596  	// around rate, sampling and many return parameters.
   597  	addValues := func(countString, sizeString string, label string) error {
   598  		count, err := strconv.ParseInt(countString, 10, 64)
   599  		if err != nil {
   600  			return fmt.Errorf("malformed sample: %s: %v", line, err)
   601  		}
   602  		size, err := strconv.ParseInt(sizeString, 10, 64)
   603  		if err != nil {
   604  			return fmt.Errorf("malformed sample: %s: %v", line, err)
   605  		}
   606  		if count == 0 && size != 0 {
   607  			return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
   608  		}
   609  		if count != 0 {
   610  			blocksize = size / count
   611  			if sampling == "v2" {
   612  				count, size = scaleHeapSample(count, size, rate)
   613  			}
   614  		}
   615  		value = append(value, count, size)
   616  		return nil
   617  	}
   618  
   619  	if includeAlloc {
   620  		if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
   621  			return nil, 0, nil, err
   622  		}
   623  	}
   624  
   625  	if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
   626  		return nil, 0, nil, err
   627  	}
   628  
   629  	addrs, err = parseHexAddresses(sampleData[5])
   630  	if err != nil {
   631  		return nil, 0, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   632  	}
   633  
   634  	return value, blocksize, addrs, nil
   635  }
   636  
   637  // parseHexAddresses extracts hex numbers from a string, attempts to convert
   638  // each to an unsigned 64-bit number and returns the resulting numbers as a
   639  // slice, or an error if the string contains hex numbers which are too large to
   640  // handle (which means a malformed profile).
   641  func parseHexAddresses(s string) ([]uint64, error) {
   642  	hexStrings := hexNumberRE.FindAllString(s, -1)
   643  	var addrs []uint64
   644  	for _, s := range hexStrings {
   645  		if addr, err := strconv.ParseUint(s, 0, 64); err == nil {
   646  			addrs = append(addrs, addr)
   647  		} else {
   648  			return nil, fmt.Errorf("failed to parse as hex 64-bit number: %s", s)
   649  		}
   650  	}
   651  	return addrs, nil
   652  }
   653  
   654  // scaleHeapSample adjusts the data from a heapz Sample to
   655  // account for its probability of appearing in the collected
   656  // data. heapz profiles are a sampling of the memory allocations
   657  // requests in a program. We estimate the unsampled value by dividing
   658  // each collected sample by its probability of appearing in the
   659  // profile. heapz v2 profiles rely on a poisson process to determine
   660  // which samples to collect, based on the desired average collection
   661  // rate R. The probability of a sample of size S to appear in that
   662  // profile is 1-exp(-S/R).
   663  func scaleHeapSample(count, size, rate int64) (int64, int64) {
   664  	if count == 0 || size == 0 {
   665  		return 0, 0
   666  	}
   667  
   668  	if rate <= 1 {
   669  		// if rate==1 all samples were collected so no adjustment is needed.
   670  		// if rate<1 treat as unknown and skip scaling.
   671  		return count, size
   672  	}
   673  
   674  	avgSize := float64(size) / float64(count)
   675  	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
   676  
   677  	return int64(float64(count) * scale), int64(float64(size) * scale)
   678  }
   679  
   680  // parseContention parses a mutex or contention profile. There are 2 cases:
   681  // "--- contentionz " for legacy C++ profiles (and backwards compatibility)
   682  // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
   683  func parseContention(b []byte) (*Profile, error) {
   684  	s := bufio.NewScanner(bytes.NewBuffer(b))
   685  	if !s.Scan() {
   686  		if err := s.Err(); err != nil {
   687  			return nil, err
   688  		}
   689  		return nil, errUnrecognized
   690  	}
   691  
   692  	switch l := s.Text(); {
   693  	case strings.HasPrefix(l, "--- contentionz "):
   694  	case strings.HasPrefix(l, "--- mutex:"):
   695  	case strings.HasPrefix(l, "--- contention:"):
   696  	default:
   697  		return nil, errUnrecognized
   698  	}
   699  
   700  	p := &Profile{
   701  		PeriodType: &ValueType{Type: "contentions", Unit: "count"},
   702  		Period:     1,
   703  		SampleType: []*ValueType{
   704  			{Type: "contentions", Unit: "count"},
   705  			{Type: "delay", Unit: "nanoseconds"},
   706  		},
   707  	}
   708  
   709  	var cpuHz int64
   710  	// Parse text of the form "attribute = value" before the samples.
   711  	const delimiter = "="
   712  	for s.Scan() {
   713  		line := s.Text()
   714  		if line = strings.TrimSpace(line); isSpaceOrComment(line) {
   715  			continue
   716  		}
   717  		if strings.HasPrefix(line, "---") {
   718  			break
   719  		}
   720  		attr := strings.SplitN(line, delimiter, 2)
   721  		if len(attr) != 2 {
   722  			break
   723  		}
   724  		key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
   725  		var err error
   726  		switch key {
   727  		case "cycles/second":
   728  			if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
   729  				return nil, errUnrecognized
   730  			}
   731  		case "sampling period":
   732  			if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
   733  				return nil, errUnrecognized
   734  			}
   735  		case "ms since reset":
   736  			ms, err := strconv.ParseInt(val, 0, 64)
   737  			if err != nil {
   738  				return nil, errUnrecognized
   739  			}
   740  			p.DurationNanos = ms * 1000 * 1000
   741  		case "format":
   742  			// CPP contentionz profiles don't have format.
   743  			return nil, errUnrecognized
   744  		case "resolution":
   745  			// CPP contentionz profiles don't have resolution.
   746  			return nil, errUnrecognized
   747  		case "discarded samples":
   748  		default:
   749  			return nil, errUnrecognized
   750  		}
   751  	}
   752  	if err := s.Err(); err != nil {
   753  		return nil, err
   754  	}
   755  
   756  	locs := make(map[uint64]*Location)
   757  	for {
   758  		line := strings.TrimSpace(s.Text())
   759  		if strings.HasPrefix(line, "---") {
   760  			break
   761  		}
   762  		if !isSpaceOrComment(line) {
   763  			value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
   764  			if err != nil {
   765  				return nil, err
   766  			}
   767  			var sloc []*Location
   768  			for _, addr := range addrs {
   769  				// Addresses from stack traces point to the next instruction after
   770  				// each call. Adjust by -1 to land somewhere on the actual call.
   771  				addr--
   772  				loc := locs[addr]
   773  				if locs[addr] == nil {
   774  					loc = &Location{
   775  						Address: addr,
   776  					}
   777  					p.Location = append(p.Location, loc)
   778  					locs[addr] = loc
   779  				}
   780  				sloc = append(sloc, loc)
   781  			}
   782  			p.Sample = append(p.Sample, &Sample{
   783  				Value:    value,
   784  				Location: sloc,
   785  			})
   786  		}
   787  		if !s.Scan() {
   788  			break
   789  		}
   790  	}
   791  	if err := s.Err(); err != nil {
   792  		return nil, err
   793  	}
   794  
   795  	if err := parseAdditionalSections(s, p); err != nil {
   796  		return nil, err
   797  	}
   798  
   799  	return p, nil
   800  }
   801  
   802  // parseContentionSample parses a single row from a contention profile
   803  // into a new Sample.
   804  func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
   805  	sampleData := contentionSampleRE.FindStringSubmatch(line)
   806  	if sampleData == nil {
   807  		return nil, nil, errUnrecognized
   808  	}
   809  
   810  	v1, err := strconv.ParseInt(sampleData[1], 10, 64)
   811  	if err != nil {
   812  		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   813  	}
   814  	v2, err := strconv.ParseInt(sampleData[2], 10, 64)
   815  	if err != nil {
   816  		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   817  	}
   818  
   819  	// Unsample values if period and cpuHz are available.
   820  	// - Delays are scaled to cycles and then to nanoseconds.
   821  	// - Contentions are scaled to cycles.
   822  	if period > 0 {
   823  		if cpuHz > 0 {
   824  			cpuGHz := float64(cpuHz) / 1e9
   825  			v1 = int64(float64(v1) * float64(period) / cpuGHz)
   826  		}
   827  		v2 = v2 * period
   828  	}
   829  
   830  	value = []int64{v2, v1}
   831  	addrs, err = parseHexAddresses(sampleData[3])
   832  	if err != nil {
   833  		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   834  	}
   835  
   836  	return value, addrs, nil
   837  }
   838  
   839  // parseThread parses a Threadz profile and returns a new Profile.
   840  func parseThread(b []byte) (*Profile, error) {
   841  	s := bufio.NewScanner(bytes.NewBuffer(b))
   842  	// Skip past comments and empty lines seeking a real header.
   843  	for s.Scan() && isSpaceOrComment(s.Text()) {
   844  	}
   845  
   846  	line := s.Text()
   847  	if m := threadzStartRE.FindStringSubmatch(line); m != nil {
   848  		// Advance over initial comments until first stack trace.
   849  		for s.Scan() {
   850  			if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") {
   851  				break
   852  			}
   853  		}
   854  	} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   855  		return nil, errUnrecognized
   856  	}
   857  
   858  	p := &Profile{
   859  		SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
   860  		PeriodType: &ValueType{Type: "thread", Unit: "count"},
   861  		Period:     1,
   862  	}
   863  
   864  	locs := make(map[uint64]*Location)
   865  	// Recognize each thread and populate profile samples.
   866  	for !isMemoryMapSentinel(line) {
   867  		if strings.HasPrefix(line, "---- no stack trace for") {
   868  			break
   869  		}
   870  		if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
   871  			return nil, errUnrecognized
   872  		}
   873  
   874  		var addrs []uint64
   875  		var err error
   876  		line, addrs, err = parseThreadSample(s)
   877  		if err != nil {
   878  			return nil, err
   879  		}
   880  		if len(addrs) == 0 {
   881  			// We got a --same as previous threads--. Bump counters.
   882  			if len(p.Sample) > 0 {
   883  				s := p.Sample[len(p.Sample)-1]
   884  				s.Value[0]++
   885  			}
   886  			continue
   887  		}
   888  
   889  		var sloc []*Location
   890  		for i, addr := range addrs {
   891  			// Addresses from stack traces point to the next instruction after
   892  			// each call. Adjust by -1 to land somewhere on the actual call
   893  			// (except for the leaf, which is not a call).
   894  			if i > 0 {
   895  				addr--
   896  			}
   897  			loc := locs[addr]
   898  			if locs[addr] == nil {
   899  				loc = &Location{
   900  					Address: addr,
   901  				}
   902  				p.Location = append(p.Location, loc)
   903  				locs[addr] = loc
   904  			}
   905  			sloc = append(sloc, loc)
   906  		}
   907  
   908  		p.Sample = append(p.Sample, &Sample{
   909  			Value:    []int64{1},
   910  			Location: sloc,
   911  		})
   912  	}
   913  
   914  	if err := parseAdditionalSections(s, p); err != nil {
   915  		return nil, err
   916  	}
   917  
   918  	cleanupDuplicateLocations(p)
   919  	return p, nil
   920  }
   921  
   922  // parseThreadSample parses a symbolized or unsymbolized stack trace.
   923  // Returns the first line after the traceback, the sample (or nil if
   924  // it hits a 'same-as-previous' marker) and an error.
   925  func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
   926  	var line string
   927  	sameAsPrevious := false
   928  	for s.Scan() {
   929  		line = strings.TrimSpace(s.Text())
   930  		if line == "" {
   931  			continue
   932  		}
   933  
   934  		if strings.HasPrefix(line, "---") {
   935  			break
   936  		}
   937  		if strings.Contains(line, "same as previous thread") {
   938  			sameAsPrevious = true
   939  			continue
   940  		}
   941  
   942  		curAddrs, err := parseHexAddresses(line)
   943  		if err != nil {
   944  			return "", nil, fmt.Errorf("malformed sample: %s: %v", line, err)
   945  		}
   946  		addrs = append(addrs, curAddrs...)
   947  	}
   948  	if err := s.Err(); err != nil {
   949  		return "", nil, err
   950  	}
   951  	if sameAsPrevious {
   952  		return line, nil, nil
   953  	}
   954  	return line, addrs, nil
   955  }
   956  
   957  // parseAdditionalSections parses any additional sections in the
   958  // profile, ignoring any unrecognized sections.
   959  func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
   960  	for !isMemoryMapSentinel(s.Text()) && s.Scan() {
   961  	}
   962  	if err := s.Err(); err != nil {
   963  		return err
   964  	}
   965  	return p.ParseMemoryMapFromScanner(s)
   966  }
   967  
   968  // ParseProcMaps parses a memory map in the format of /proc/self/maps.
   969  // ParseMemoryMap should be called after setting on a profile to
   970  // associate locations to the corresponding mapping based on their
   971  // address.
   972  func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
   973  	s := bufio.NewScanner(rd)
   974  	return parseProcMapsFromScanner(s)
   975  }
   976  
   977  func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
   978  	var mapping []*Mapping
   979  
   980  	var attrs []string
   981  	const delimiter = "="
   982  	r := strings.NewReplacer()
   983  	for s.Scan() {
   984  		line := r.Replace(removeLoggingInfo(s.Text()))
   985  		m, err := parseMappingEntry(line)
   986  		if err != nil {
   987  			if err == errUnrecognized {
   988  				// Recognize assignments of the form: attr=value, and replace
   989  				// $attr with value on subsequent mappings.
   990  				if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
   991  					attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
   992  					r = strings.NewReplacer(attrs...)
   993  				}
   994  				// Ignore any unrecognized entries
   995  				continue
   996  			}
   997  			return nil, err
   998  		}
   999  		if m == nil {
  1000  			continue
  1001  		}
  1002  		mapping = append(mapping, m)
  1003  	}
  1004  	if err := s.Err(); err != nil {
  1005  		return nil, err
  1006  	}
  1007  	return mapping, nil
  1008  }
  1009  
  1010  // removeLoggingInfo detects and removes log prefix entries generated
  1011  // by the glog package. If no logging prefix is detected, the string
  1012  // is returned unmodified.
  1013  func removeLoggingInfo(line string) string {
  1014  	if match := logInfoRE.FindStringIndex(line); match != nil {
  1015  		return line[match[1]:]
  1016  	}
  1017  	return line
  1018  }
  1019  
  1020  // ParseMemoryMap parses a memory map in the format of
  1021  // /proc/self/maps, and overrides the mappings in the current profile.
  1022  // It renumbers the samples and locations in the profile correspondingly.
  1023  func (p *Profile) ParseMemoryMap(rd io.Reader) error {
  1024  	return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd))
  1025  }
  1026  
  1027  // ParseMemoryMapFromScanner parses a memory map in the format of
  1028  // /proc/self/maps or a variety of legacy format, and overrides the
  1029  // mappings in the current profile.  It renumbers the samples and
  1030  // locations in the profile correspondingly.
  1031  func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error {
  1032  	mapping, err := parseProcMapsFromScanner(s)
  1033  	if err != nil {
  1034  		return err
  1035  	}
  1036  	p.Mapping = append(p.Mapping, mapping...)
  1037  	p.massageMappings()
  1038  	p.remapLocationIDs()
  1039  	p.remapFunctionIDs()
  1040  	p.remapMappingIDs()
  1041  	return nil
  1042  }
  1043  
  1044  func parseMappingEntry(l string) (*Mapping, error) {
  1045  	var start, end, perm, file, offset, buildID string
  1046  	if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 {
  1047  		start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5]
  1048  	} else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 {
  1049  		start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6]
  1050  	} else {
  1051  		return nil, errUnrecognized
  1052  	}
  1053  
  1054  	var err error
  1055  	mapping := &Mapping{
  1056  		File:    file,
  1057  		BuildID: buildID,
  1058  	}
  1059  	if perm != "" && !strings.Contains(perm, "x") {
  1060  		// Skip non-executable entries.
  1061  		return nil, nil
  1062  	}
  1063  	if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil {
  1064  		return nil, errUnrecognized
  1065  	}
  1066  	if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil {
  1067  		return nil, errUnrecognized
  1068  	}
  1069  	if offset != "" {
  1070  		if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
  1071  			return nil, errUnrecognized
  1072  		}
  1073  	}
  1074  	return mapping, nil
  1075  }
  1076  
  1077  var memoryMapSentinels = []string{
  1078  	"--- Memory map: ---",
  1079  	"MAPPED_LIBRARIES:",
  1080  }
  1081  
  1082  // isMemoryMapSentinel returns true if the string contains one of the
  1083  // known sentinels for memory map information.
  1084  func isMemoryMapSentinel(line string) bool {
  1085  	for _, s := range memoryMapSentinels {
  1086  		if strings.Contains(line, s) {
  1087  			return true
  1088  		}
  1089  	}
  1090  	return false
  1091  }
  1092  
  1093  func (p *Profile) addLegacyFrameInfo() {
  1094  	switch {
  1095  	case isProfileType(p, heapzSampleTypes):
  1096  		p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
  1097  	case isProfileType(p, contentionzSampleTypes):
  1098  		p.DropFrames, p.KeepFrames = lockRxStr, ""
  1099  	default:
  1100  		p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
  1101  	}
  1102  }
  1103  
  1104  var heapzSampleTypes = [][]string{
  1105  	{"allocations", "size"}, // early Go pprof profiles
  1106  	{"objects", "space"},
  1107  	{"inuse_objects", "inuse_space"},
  1108  	{"alloc_objects", "alloc_space"},
  1109  	{"alloc_objects", "alloc_space", "inuse_objects", "inuse_space"}, // Go pprof legacy profiles
  1110  }
  1111  var contentionzSampleTypes = [][]string{
  1112  	{"contentions", "delay"},
  1113  }
  1114  
  1115  func isProfileType(p *Profile, types [][]string) bool {
  1116  	st := p.SampleType
  1117  nextType:
  1118  	for _, t := range types {
  1119  		if len(st) != len(t) {
  1120  			continue
  1121  		}
  1122  
  1123  		for i := range st {
  1124  			if st[i].Type != t[i] {
  1125  				continue nextType
  1126  			}
  1127  		}
  1128  		return true
  1129  	}
  1130  	return false
  1131  }
  1132  
  1133  var allocRxStr = strings.Join([]string{
  1134  	// POSIX entry points.
  1135  	`calloc`,
  1136  	`cfree`,
  1137  	`malloc`,
  1138  	`free`,
  1139  	`memalign`,
  1140  	`do_memalign`,
  1141  	`(__)?posix_memalign`,
  1142  	`pvalloc`,
  1143  	`valloc`,
  1144  	`realloc`,
  1145  
  1146  	// TC malloc.
  1147  	`tcmalloc::.*`,
  1148  	`tc_calloc`,
  1149  	`tc_cfree`,
  1150  	`tc_malloc`,
  1151  	`tc_free`,
  1152  	`tc_memalign`,
  1153  	`tc_posix_memalign`,
  1154  	`tc_pvalloc`,
  1155  	`tc_valloc`,
  1156  	`tc_realloc`,
  1157  	`tc_new`,
  1158  	`tc_delete`,
  1159  	`tc_newarray`,
  1160  	`tc_deletearray`,
  1161  	`tc_new_nothrow`,
  1162  	`tc_newarray_nothrow`,
  1163  
  1164  	// Memory-allocation routines on OS X.
  1165  	`malloc_zone_malloc`,
  1166  	`malloc_zone_calloc`,
  1167  	`malloc_zone_valloc`,
  1168  	`malloc_zone_realloc`,
  1169  	`malloc_zone_memalign`,
  1170  	`malloc_zone_free`,
  1171  
  1172  	// Go runtime
  1173  	`runtime\..*`,
  1174  
  1175  	// Other misc. memory allocation routines
  1176  	`BaseArena::.*`,
  1177  	`(::)?do_malloc_no_errno`,
  1178  	`(::)?do_malloc_pages`,
  1179  	`(::)?do_malloc`,
  1180  	`DoSampledAllocation`,
  1181  	`MallocedMemBlock::MallocedMemBlock`,
  1182  	`_M_allocate`,
  1183  	`__builtin_(vec_)?delete`,
  1184  	`__builtin_(vec_)?new`,
  1185  	`__gnu_cxx::new_allocator::allocate`,
  1186  	`__libc_malloc`,
  1187  	`__malloc_alloc_template::allocate`,
  1188  	`allocate`,
  1189  	`cpp_alloc`,
  1190  	`operator new(\[\])?`,
  1191  	`simple_alloc::allocate`,
  1192  }, `|`)
  1193  
  1194  var allocSkipRxStr = strings.Join([]string{
  1195  	// Preserve Go runtime frames that appear in the middle/bottom of
  1196  	// the stack.
  1197  	`runtime\.panic`,
  1198  	`runtime\.reflectcall`,
  1199  	`runtime\.call[0-9]*`,
  1200  }, `|`)
  1201  
  1202  var cpuProfilerRxStr = strings.Join([]string{
  1203  	`ProfileData::Add`,
  1204  	`ProfileData::prof_handler`,
  1205  	`CpuProfiler::prof_handler`,
  1206  	`__pthread_sighandler`,
  1207  	`__restore`,
  1208  }, `|`)
  1209  
  1210  var lockRxStr = strings.Join([]string{
  1211  	`RecordLockProfileData`,
  1212  	`(base::)?RecordLockProfileData.*`,
  1213  	`(base::)?SubmitMutexProfileData.*`,
  1214  	`(base::)?SubmitSpinLockProfileData.*`,
  1215  	`(base::Mutex::)?AwaitCommon.*`,
  1216  	`(base::Mutex::)?Unlock.*`,
  1217  	`(base::Mutex::)?UnlockSlow.*`,
  1218  	`(base::Mutex::)?ReaderUnlock.*`,
  1219  	`(base::MutexLock::)?~MutexLock.*`,
  1220  	`(Mutex::)?AwaitCommon.*`,
  1221  	`(Mutex::)?Unlock.*`,
  1222  	`(Mutex::)?UnlockSlow.*`,
  1223  	`(Mutex::)?ReaderUnlock.*`,
  1224  	`(MutexLock::)?~MutexLock.*`,
  1225  	`(SpinLock::)?Unlock.*`,
  1226  	`(SpinLock::)?SlowUnlock.*`,
  1227  	`(SpinLockHolder::)?~SpinLockHolder.*`,
  1228  }, `|`)
  1229  

View as plain text