Source file src/internal/profile/profile.go

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package profile provides a representation of
     6  // github.com/google/pprof/proto/profile.proto and
     7  // methods to encode/decode/merge profiles in this format.
     8  package profile
     9  
    10  import (
    11  	"bytes"
    12  	"compress/gzip"
    13  	"fmt"
    14  	"internal/lazyregexp"
    15  	"io"
    16  	"strings"
    17  	"time"
    18  )
    19  
    20  // Profile is an in-memory representation of profile.proto.
    21  type Profile struct {
    22  	SampleType        []*ValueType
    23  	DefaultSampleType string
    24  	Sample            []*Sample
    25  	Mapping           []*Mapping
    26  	Location          []*Location
    27  	Function          []*Function
    28  	Comments          []string
    29  
    30  	DropFrames string
    31  	KeepFrames string
    32  
    33  	TimeNanos     int64
    34  	DurationNanos int64
    35  	PeriodType    *ValueType
    36  	Period        int64
    37  
    38  	commentX           []int64
    39  	dropFramesX        int64
    40  	keepFramesX        int64
    41  	stringTable        []string
    42  	defaultSampleTypeX int64
    43  }
    44  
    45  // ValueType corresponds to Profile.ValueType
    46  type ValueType struct {
    47  	Type string // cpu, wall, inuse_space, etc
    48  	Unit string // seconds, nanoseconds, bytes, etc
    49  
    50  	typeX int64
    51  	unitX int64
    52  }
    53  
    54  // Sample corresponds to Profile.Sample
    55  type Sample struct {
    56  	Location []*Location
    57  	Value    []int64
    58  	Label    map[string][]string
    59  	NumLabel map[string][]int64
    60  	NumUnit  map[string][]string
    61  
    62  	locationIDX []uint64
    63  	labelX      []Label
    64  }
    65  
    66  // Label corresponds to Profile.Label
    67  type Label struct {
    68  	keyX int64
    69  	// Exactly one of the two following values must be set
    70  	strX int64
    71  	numX int64 // Integer value for this label
    72  }
    73  
    74  // Mapping corresponds to Profile.Mapping
    75  type Mapping struct {
    76  	ID              uint64
    77  	Start           uint64
    78  	Limit           uint64
    79  	Offset          uint64
    80  	File            string
    81  	BuildID         string
    82  	HasFunctions    bool
    83  	HasFilenames    bool
    84  	HasLineNumbers  bool
    85  	HasInlineFrames bool
    86  
    87  	fileX    int64
    88  	buildIDX int64
    89  }
    90  
    91  // Location corresponds to Profile.Location
    92  type Location struct {
    93  	ID       uint64
    94  	Mapping  *Mapping
    95  	Address  uint64
    96  	Line     []Line
    97  	IsFolded bool
    98  
    99  	mappingIDX uint64
   100  }
   101  
   102  // Line corresponds to Profile.Line
   103  type Line struct {
   104  	Function *Function
   105  	Line     int64
   106  
   107  	functionIDX uint64
   108  }
   109  
   110  // Function corresponds to Profile.Function
   111  type Function struct {
   112  	ID         uint64
   113  	Name       string
   114  	SystemName string
   115  	Filename   string
   116  	StartLine  int64
   117  
   118  	nameX       int64
   119  	systemNameX int64
   120  	filenameX   int64
   121  }
   122  
   123  // Parse parses a profile and checks for its validity. The input
   124  // may be a gzip-compressed encoded protobuf or one of many legacy
   125  // profile formats which may be unsupported in the future.
   126  func Parse(r io.Reader) (*Profile, error) {
   127  	orig, err := io.ReadAll(r)
   128  	if err != nil {
   129  		return nil, err
   130  	}
   131  
   132  	var p *Profile
   133  	if len(orig) >= 2 && orig[0] == 0x1f && orig[1] == 0x8b {
   134  		gz, err := gzip.NewReader(bytes.NewBuffer(orig))
   135  		if err != nil {
   136  			return nil, fmt.Errorf("decompressing profile: %v", err)
   137  		}
   138  		data, err := io.ReadAll(gz)
   139  		if err != nil {
   140  			return nil, fmt.Errorf("decompressing profile: %v", err)
   141  		}
   142  		orig = data
   143  	}
   144  
   145  	var lErr error
   146  	p, pErr := parseUncompressed(orig)
   147  	if pErr != nil {
   148  		p, lErr = parseLegacy(orig)
   149  	}
   150  	if pErr != nil && lErr != nil {
   151  		return nil, fmt.Errorf("parsing profile: not a valid proto profile (%w) or legacy profile (%w)", pErr, lErr)
   152  	}
   153  
   154  	if err := p.CheckValid(); err != nil {
   155  		return nil, fmt.Errorf("malformed profile: %v", err)
   156  	}
   157  	return p, nil
   158  }
   159  
   160  var errUnrecognized = fmt.Errorf("unrecognized profile format")
   161  var errMalformed = fmt.Errorf("malformed profile format")
   162  var ErrNoData = fmt.Errorf("empty input file")
   163  
   164  func parseLegacy(data []byte) (*Profile, error) {
   165  	parsers := []func([]byte) (*Profile, error){
   166  		parseCPU,
   167  		parseHeap,
   168  		parseGoCount, // goroutine, threadcreate
   169  		parseThread,
   170  		parseContention,
   171  	}
   172  
   173  	for _, parser := range parsers {
   174  		p, err := parser(data)
   175  		if err == nil {
   176  			p.setMain()
   177  			p.addLegacyFrameInfo()
   178  			return p, nil
   179  		}
   180  		if err != errUnrecognized {
   181  			return nil, err
   182  		}
   183  	}
   184  	return nil, errUnrecognized
   185  }
   186  
   187  func parseUncompressed(data []byte) (*Profile, error) {
   188  	if len(data) == 0 {
   189  		return nil, ErrNoData
   190  	}
   191  
   192  	p := &Profile{}
   193  	if err := unmarshal(data, p); err != nil {
   194  		return nil, err
   195  	}
   196  
   197  	if err := p.postDecode(); err != nil {
   198  		return nil, err
   199  	}
   200  
   201  	return p, nil
   202  }
   203  
   204  var libRx = lazyregexp.New(`([.]so$|[.]so[._][0-9]+)`)
   205  
   206  // setMain scans Mapping entries and guesses which entry is main
   207  // because legacy profiles don't obey the convention of putting main
   208  // first.
   209  func (p *Profile) setMain() {
   210  	for i := 0; i < len(p.Mapping); i++ {
   211  		file := strings.TrimSpace(strings.ReplaceAll(p.Mapping[i].File, "(deleted)", ""))
   212  		if len(file) == 0 {
   213  			continue
   214  		}
   215  		if len(libRx.FindStringSubmatch(file)) > 0 {
   216  			continue
   217  		}
   218  		if strings.HasPrefix(file, "[") {
   219  			continue
   220  		}
   221  		// Swap what we guess is main to position 0.
   222  		p.Mapping[i], p.Mapping[0] = p.Mapping[0], p.Mapping[i]
   223  		break
   224  	}
   225  }
   226  
   227  // Write writes the profile as a gzip-compressed marshaled protobuf.
   228  func (p *Profile) Write(w io.Writer) error {
   229  	p.preEncode()
   230  	b := marshal(p)
   231  	zw := gzip.NewWriter(w)
   232  	defer zw.Close()
   233  	_, err := zw.Write(b)
   234  	return err
   235  }
   236  
   237  // CheckValid tests whether the profile is valid. Checks include, but are
   238  // not limited to:
   239  //   - len(Profile.Sample[n].value) == len(Profile.value_unit)
   240  //   - Sample.id has a corresponding Profile.Location
   241  func (p *Profile) CheckValid() error {
   242  	// Check that sample values are consistent
   243  	sampleLen := len(p.SampleType)
   244  	if sampleLen == 0 && len(p.Sample) != 0 {
   245  		return fmt.Errorf("missing sample type information")
   246  	}
   247  	for _, s := range p.Sample {
   248  		if len(s.Value) != sampleLen {
   249  			return fmt.Errorf("mismatch: sample has: %d values vs. %d types", len(s.Value), len(p.SampleType))
   250  		}
   251  	}
   252  
   253  	// Check that all mappings/locations/functions are in the tables
   254  	// Check that there are no duplicate ids
   255  	mappings := make(map[uint64]*Mapping, len(p.Mapping))
   256  	for _, m := range p.Mapping {
   257  		if m.ID == 0 {
   258  			return fmt.Errorf("found mapping with reserved ID=0")
   259  		}
   260  		if mappings[m.ID] != nil {
   261  			return fmt.Errorf("multiple mappings with same id: %d", m.ID)
   262  		}
   263  		mappings[m.ID] = m
   264  	}
   265  	functions := make(map[uint64]*Function, len(p.Function))
   266  	for _, f := range p.Function {
   267  		if f.ID == 0 {
   268  			return fmt.Errorf("found function with reserved ID=0")
   269  		}
   270  		if functions[f.ID] != nil {
   271  			return fmt.Errorf("multiple functions with same id: %d", f.ID)
   272  		}
   273  		functions[f.ID] = f
   274  	}
   275  	locations := make(map[uint64]*Location, len(p.Location))
   276  	for _, l := range p.Location {
   277  		if l.ID == 0 {
   278  			return fmt.Errorf("found location with reserved id=0")
   279  		}
   280  		if locations[l.ID] != nil {
   281  			return fmt.Errorf("multiple locations with same id: %d", l.ID)
   282  		}
   283  		locations[l.ID] = l
   284  		if m := l.Mapping; m != nil {
   285  			if m.ID == 0 || mappings[m.ID] != m {
   286  				return fmt.Errorf("inconsistent mapping %p: %d", m, m.ID)
   287  			}
   288  		}
   289  		for _, ln := range l.Line {
   290  			if f := ln.Function; f != nil {
   291  				if f.ID == 0 || functions[f.ID] != f {
   292  					return fmt.Errorf("inconsistent function %p: %d", f, f.ID)
   293  				}
   294  			}
   295  		}
   296  	}
   297  	return nil
   298  }
   299  
   300  // Aggregate merges the locations in the profile into equivalence
   301  // classes preserving the request attributes. It also updates the
   302  // samples to point to the merged locations.
   303  func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, address bool) error {
   304  	for _, m := range p.Mapping {
   305  		m.HasInlineFrames = m.HasInlineFrames && inlineFrame
   306  		m.HasFunctions = m.HasFunctions && function
   307  		m.HasFilenames = m.HasFilenames && filename
   308  		m.HasLineNumbers = m.HasLineNumbers && linenumber
   309  	}
   310  
   311  	// Aggregate functions
   312  	if !function || !filename {
   313  		for _, f := range p.Function {
   314  			if !function {
   315  				f.Name = ""
   316  				f.SystemName = ""
   317  			}
   318  			if !filename {
   319  				f.Filename = ""
   320  			}
   321  		}
   322  	}
   323  
   324  	// Aggregate locations
   325  	if !inlineFrame || !address || !linenumber {
   326  		for _, l := range p.Location {
   327  			if !inlineFrame && len(l.Line) > 1 {
   328  				l.Line = l.Line[len(l.Line)-1:]
   329  			}
   330  			if !linenumber {
   331  				for i := range l.Line {
   332  					l.Line[i].Line = 0
   333  				}
   334  			}
   335  			if !address {
   336  				l.Address = 0
   337  			}
   338  		}
   339  	}
   340  
   341  	return p.CheckValid()
   342  }
   343  
   344  // Print dumps a text representation of a profile. Intended mainly
   345  // for debugging purposes.
   346  func (p *Profile) String() string {
   347  
   348  	ss := make([]string, 0, len(p.Sample)+len(p.Mapping)+len(p.Location))
   349  	if pt := p.PeriodType; pt != nil {
   350  		ss = append(ss, fmt.Sprintf("PeriodType: %s %s", pt.Type, pt.Unit))
   351  	}
   352  	ss = append(ss, fmt.Sprintf("Period: %d", p.Period))
   353  	if p.TimeNanos != 0 {
   354  		ss = append(ss, fmt.Sprintf("Time: %v", time.Unix(0, p.TimeNanos)))
   355  	}
   356  	if p.DurationNanos != 0 {
   357  		ss = append(ss, fmt.Sprintf("Duration: %v", time.Duration(p.DurationNanos)))
   358  	}
   359  
   360  	ss = append(ss, "Samples:")
   361  	var sh1 string
   362  	for _, s := range p.SampleType {
   363  		sh1 = sh1 + fmt.Sprintf("%s/%s ", s.Type, s.Unit)
   364  	}
   365  	ss = append(ss, strings.TrimSpace(sh1))
   366  	for _, s := range p.Sample {
   367  		var sv string
   368  		for _, v := range s.Value {
   369  			sv = fmt.Sprintf("%s %10d", sv, v)
   370  		}
   371  		sv = sv + ": "
   372  		for _, l := range s.Location {
   373  			sv = sv + fmt.Sprintf("%d ", l.ID)
   374  		}
   375  		ss = append(ss, sv)
   376  		const labelHeader = "                "
   377  		if len(s.Label) > 0 {
   378  			ls := labelHeader
   379  			for k, v := range s.Label {
   380  				ls = ls + fmt.Sprintf("%s:%v ", k, v)
   381  			}
   382  			ss = append(ss, ls)
   383  		}
   384  		if len(s.NumLabel) > 0 {
   385  			ls := labelHeader
   386  			for k, v := range s.NumLabel {
   387  				ls = ls + fmt.Sprintf("%s:%v ", k, v)
   388  			}
   389  			ss = append(ss, ls)
   390  		}
   391  	}
   392  
   393  	ss = append(ss, "Locations")
   394  	for _, l := range p.Location {
   395  		locStr := fmt.Sprintf("%6d: %#x ", l.ID, l.Address)
   396  		if m := l.Mapping; m != nil {
   397  			locStr = locStr + fmt.Sprintf("M=%d ", m.ID)
   398  		}
   399  		if len(l.Line) == 0 {
   400  			ss = append(ss, locStr)
   401  		}
   402  		for li := range l.Line {
   403  			lnStr := "??"
   404  			if fn := l.Line[li].Function; fn != nil {
   405  				lnStr = fmt.Sprintf("%s %s:%d s=%d",
   406  					fn.Name,
   407  					fn.Filename,
   408  					l.Line[li].Line,
   409  					fn.StartLine)
   410  				if fn.Name != fn.SystemName {
   411  					lnStr = lnStr + "(" + fn.SystemName + ")"
   412  				}
   413  			}
   414  			ss = append(ss, locStr+lnStr)
   415  			// Do not print location details past the first line
   416  			locStr = "             "
   417  		}
   418  	}
   419  
   420  	ss = append(ss, "Mappings")
   421  	for _, m := range p.Mapping {
   422  		bits := ""
   423  		if m.HasFunctions {
   424  			bits += "[FN]"
   425  		}
   426  		if m.HasFilenames {
   427  			bits += "[FL]"
   428  		}
   429  		if m.HasLineNumbers {
   430  			bits += "[LN]"
   431  		}
   432  		if m.HasInlineFrames {
   433  			bits += "[IN]"
   434  		}
   435  		ss = append(ss, fmt.Sprintf("%d: %#x/%#x/%#x %s %s %s",
   436  			m.ID,
   437  			m.Start, m.Limit, m.Offset,
   438  			m.File,
   439  			m.BuildID,
   440  			bits))
   441  	}
   442  
   443  	return strings.Join(ss, "\n") + "\n"
   444  }
   445  
   446  // Merge adds profile p adjusted by ratio r into profile p. Profiles
   447  // must be compatible (same Type and SampleType).
   448  // TODO(rsilvera): consider normalizing the profiles based on the
   449  // total samples collected.
   450  func (p *Profile) Merge(pb *Profile, r float64) error {
   451  	if err := p.Compatible(pb); err != nil {
   452  		return err
   453  	}
   454  
   455  	pb = pb.Copy()
   456  
   457  	// Keep the largest of the two periods.
   458  	if pb.Period > p.Period {
   459  		p.Period = pb.Period
   460  	}
   461  
   462  	p.DurationNanos += pb.DurationNanos
   463  
   464  	p.Mapping = append(p.Mapping, pb.Mapping...)
   465  	for i, m := range p.Mapping {
   466  		m.ID = uint64(i + 1)
   467  	}
   468  	p.Location = append(p.Location, pb.Location...)
   469  	for i, l := range p.Location {
   470  		l.ID = uint64(i + 1)
   471  	}
   472  	p.Function = append(p.Function, pb.Function...)
   473  	for i, f := range p.Function {
   474  		f.ID = uint64(i + 1)
   475  	}
   476  
   477  	if r != 1.0 {
   478  		for _, s := range pb.Sample {
   479  			for i, v := range s.Value {
   480  				s.Value[i] = int64((float64(v) * r))
   481  			}
   482  		}
   483  	}
   484  	p.Sample = append(p.Sample, pb.Sample...)
   485  	return p.CheckValid()
   486  }
   487  
   488  // Compatible determines if two profiles can be compared/merged.
   489  // returns nil if the profiles are compatible; otherwise an error with
   490  // details on the incompatibility.
   491  func (p *Profile) Compatible(pb *Profile) error {
   492  	if !compatibleValueTypes(p.PeriodType, pb.PeriodType) {
   493  		return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
   494  	}
   495  
   496  	if len(p.SampleType) != len(pb.SampleType) {
   497  		return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
   498  	}
   499  
   500  	for i := range p.SampleType {
   501  		if !compatibleValueTypes(p.SampleType[i], pb.SampleType[i]) {
   502  			return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
   503  		}
   504  	}
   505  
   506  	return nil
   507  }
   508  
   509  // HasFunctions determines if all locations in this profile have
   510  // symbolized function information.
   511  func (p *Profile) HasFunctions() bool {
   512  	for _, l := range p.Location {
   513  		if l.Mapping == nil || !l.Mapping.HasFunctions {
   514  			return false
   515  		}
   516  	}
   517  	return true
   518  }
   519  
   520  // HasFileLines determines if all locations in this profile have
   521  // symbolized file and line number information.
   522  func (p *Profile) HasFileLines() bool {
   523  	for _, l := range p.Location {
   524  		if l.Mapping == nil || (!l.Mapping.HasFilenames || !l.Mapping.HasLineNumbers) {
   525  			return false
   526  		}
   527  	}
   528  	return true
   529  }
   530  
   531  func compatibleValueTypes(v1, v2 *ValueType) bool {
   532  	if v1 == nil || v2 == nil {
   533  		return true // No grounds to disqualify.
   534  	}
   535  	return v1.Type == v2.Type && v1.Unit == v2.Unit
   536  }
   537  
   538  // Copy makes a fully independent copy of a profile.
   539  func (p *Profile) Copy() *Profile {
   540  	p.preEncode()
   541  	b := marshal(p)
   542  
   543  	pp := &Profile{}
   544  	if err := unmarshal(b, pp); err != nil {
   545  		panic(err)
   546  	}
   547  	if err := pp.postDecode(); err != nil {
   548  		panic(err)
   549  	}
   550  
   551  	return pp
   552  }
   553  
   554  // Demangler maps symbol names to a human-readable form. This may
   555  // include C++ demangling and additional simplification. Names that
   556  // are not demangled may be missing from the resulting map.
   557  type Demangler func(name []string) (map[string]string, error)
   558  
   559  // Demangle attempts to demangle and optionally simplify any function
   560  // names referenced in the profile. It works on a best-effort basis:
   561  // it will silently preserve the original names in case of any errors.
   562  func (p *Profile) Demangle(d Demangler) error {
   563  	// Collect names to demangle.
   564  	var names []string
   565  	for _, fn := range p.Function {
   566  		names = append(names, fn.SystemName)
   567  	}
   568  
   569  	// Update profile with demangled names.
   570  	demangled, err := d(names)
   571  	if err != nil {
   572  		return err
   573  	}
   574  	for _, fn := range p.Function {
   575  		if dd, ok := demangled[fn.SystemName]; ok {
   576  			fn.Name = dd
   577  		}
   578  	}
   579  	return nil
   580  }
   581  
   582  // Empty reports whether the profile contains no samples.
   583  func (p *Profile) Empty() bool {
   584  	return len(p.Sample) == 0
   585  }
   586  
   587  // Scale multiplies all sample values in a profile by a constant.
   588  func (p *Profile) Scale(ratio float64) {
   589  	if ratio == 1 {
   590  		return
   591  	}
   592  	ratios := make([]float64, len(p.SampleType))
   593  	for i := range p.SampleType {
   594  		ratios[i] = ratio
   595  	}
   596  	p.ScaleN(ratios)
   597  }
   598  
   599  // ScaleN multiplies each sample values in a sample by a different amount.
   600  func (p *Profile) ScaleN(ratios []float64) error {
   601  	if len(p.SampleType) != len(ratios) {
   602  		return fmt.Errorf("mismatched scale ratios, got %d, want %d", len(ratios), len(p.SampleType))
   603  	}
   604  	allOnes := true
   605  	for _, r := range ratios {
   606  		if r != 1 {
   607  			allOnes = false
   608  			break
   609  		}
   610  	}
   611  	if allOnes {
   612  		return nil
   613  	}
   614  	for _, s := range p.Sample {
   615  		for i, v := range s.Value {
   616  			if ratios[i] != 1 {
   617  				s.Value[i] = int64(float64(v) * ratios[i])
   618  			}
   619  		}
   620  	}
   621  	return nil
   622  }
   623  

View as plain text