// Copyright 2014 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // This file implements parsers to convert legacy profiles into the // profile.proto format. package profile import ( "bufio" "bytes" "fmt" "io" "math" "regexp" "strconv" "strings" ) var ( countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`) countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`) heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`) heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`) contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`) hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`) growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`) fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`) threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`) threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`) // Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools. // Recommended format: // Start End object file name offset(optional) linker build id // 0x40000-0x80000 /path/to/binary (@FF00) abc123456 spaceDigits = `\s+[[:digit:]]+` hexPair = `\s+[[:xdigit:]]+:[[:xdigit:]]+` oSpace = `\s*` // Capturing expressions. cHex = `(?:0x)?([[:xdigit:]]+)` cHexRange = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?` cSpaceString = `(?:\s+(\S+))?` cSpaceHex = `(?:\s+([[:xdigit:]]+))?` cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?` cPerm = `(?:\s+([-rwxp]+))?` procMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString) briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex) // Regular expression to parse log data, of the form: // ... file:line] msg... logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`) ) func isSpaceOrComment(line string) bool { trimmed := strings.TrimSpace(line) return len(trimmed) == 0 || trimmed[0] == '#' } // parseGoCount parses a Go count profile (e.g., threadcreate or // goroutine) and returns a new Profile. func parseGoCount(b []byte) (*Profile, error) { s := bufio.NewScanner(bytes.NewBuffer(b)) // Skip comments at the beginning of the file. for s.Scan() && isSpaceOrComment(s.Text()) { } if err := s.Err(); err != nil { return nil, err } m := countStartRE.FindStringSubmatch(s.Text()) if m == nil { return nil, errUnrecognized } profileType := m[1] p := &Profile{ PeriodType: &ValueType{Type: profileType, Unit: "count"}, Period: 1, SampleType: []*ValueType{{Type: profileType, Unit: "count"}}, } locations := make(map[uint64]*Location) for s.Scan() { line := s.Text() if isSpaceOrComment(line) { continue } if strings.HasPrefix(line, "---") { break } m := countRE.FindStringSubmatch(line) if m == nil { return nil, errMalformed } n, err := strconv.ParseInt(m[1], 0, 64) if err != nil { return nil, errMalformed } fields := strings.Fields(m[2]) locs := make([]*Location, 0, len(fields)) for _, stk := range fields { addr, err := strconv.ParseUint(stk, 0, 64) if err != nil { return nil, errMalformed } // Adjust all frames by -1 to land on top of the call instruction. addr-- loc := locations[addr] if loc == nil { loc = &Location{ Address: addr, } locations[addr] = loc p.Location = append(p.Location, loc) } locs = append(locs, loc) } p.Sample = append(p.Sample, &Sample{ Location: locs, Value: []int64{n}, }) } if err := s.Err(); err != nil { return nil, err } if err := parseAdditionalSections(s, p); err != nil { return nil, err } return p, nil } // remapLocationIDs ensures there is a location for each address // referenced by a sample, and remaps the samples to point to the new // location ids. func (p *Profile) remapLocationIDs() { seen := make(map[*Location]bool, len(p.Location)) var locs []*Location for _, s := range p.Sample { for _, l := range s.Location { if seen[l] { continue } l.ID = uint64(len(locs) + 1) locs = append(locs, l) seen[l] = true } } p.Location = locs } func (p *Profile) remapFunctionIDs() { seen := make(map[*Function]bool, len(p.Function)) var fns []*Function for _, l := range p.Location { for _, ln := range l.Line { fn := ln.Function if fn == nil || seen[fn] { continue } fn.ID = uint64(len(fns) + 1) fns = append(fns, fn) seen[fn] = true } } p.Function = fns } // remapMappingIDs matches location addresses with existing mappings // and updates them appropriately. This is O(N*M), if this ever shows // up as a bottleneck, evaluate sorting the mappings and doing a // binary search, which would make it O(N*log(M)). func (p *Profile) remapMappingIDs() { // Some profile handlers will incorrectly set regions for the main // executable if its section is remapped. Fix them through heuristics. if len(p.Mapping) > 0 { // Remove the initial mapping if named '/anon_hugepage' and has a // consecutive adjacent mapping. if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") { if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start { p.Mapping = p.Mapping[1:] } } } // Subtract the offset from the start of the main mapping if it // ends up at a recognizable start address. if len(p.Mapping) > 0 { const expectedStart = 0x400000 if m := p.Mapping[0]; m.Start-m.Offset == expectedStart { m.Start = expectedStart m.Offset = 0 } } // Associate each location with an address to the corresponding // mapping. Create fake mapping if a suitable one isn't found. var fake *Mapping nextLocation: for _, l := range p.Location { a := l.Address if l.Mapping != nil || a == 0 { continue } for _, m := range p.Mapping { if m.Start <= a && a < m.Limit { l.Mapping = m continue nextLocation } } // Work around legacy handlers failing to encode the first // part of mappings split into adjacent ranges. for _, m := range p.Mapping { if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start { m.Start -= m.Offset m.Offset = 0 l.Mapping = m continue nextLocation } } // If there is still no mapping, create a fake one. // This is important for the Go legacy handler, which produced // no mappings. if fake == nil { fake = &Mapping{ ID: 1, Limit: ^uint64(0), } p.Mapping = append(p.Mapping, fake) } l.Mapping = fake } // Reset all mapping IDs. for i, m := range p.Mapping { m.ID = uint64(i + 1) } } var cpuInts = []func([]byte) (uint64, []byte){ get32l, get32b, get64l, get64b, } func get32l(b []byte) (uint64, []byte) { if len(b) < 4 { return 0, nil } return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:] } func get32b(b []byte) (uint64, []byte) { if len(b) < 4 { return 0, nil } return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:] } func get64l(b []byte) (uint64, []byte) { if len(b) < 8 { return 0, nil } return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:] } func get64b(b []byte) (uint64, []byte) { if len(b) < 8 { return 0, nil } return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:] } // parseCPU parses a profilez legacy profile and returns a newly // populated Profile. // // The general format for profilez samples is a sequence of words in // binary format. The first words are a header with the following data: // // 1st word -- 0 // 2nd word -- 3 // 3rd word -- 0 if a c++ application, 1 if a java application. // 4th word -- Sampling period (in microseconds). // 5th word -- Padding. func parseCPU(b []byte) (*Profile, error) { var parse func([]byte) (uint64, []byte) var n1, n2, n3, n4, n5 uint64 for _, parse = range cpuInts { var tmp []byte n1, tmp = parse(b) n2, tmp = parse(tmp) n3, tmp = parse(tmp) n4, tmp = parse(tmp) n5, tmp = parse(tmp) if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 { b = tmp return cpuProfile(b, int64(n4), parse) } if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 { b = tmp return javaCPUProfile(b, int64(n4), parse) } } return nil, errUnrecognized } // cpuProfile returns a new Profile from C++ profilez data. // b is the profile bytes after the header, period is the profiling // period, and parse is a function to parse 8-byte chunks from the // profile in its native endianness. func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) { p := &Profile{ Period: period * 1000, PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"}, SampleType: []*ValueType{ {Type: "samples", Unit: "count"}, {Type: "cpu", Unit: "nanoseconds"}, }, } var err error if b, _, err = parseCPUSamples(b, parse, true, p); err != nil { return nil, err } // If *most* samples have the same second-to-the-bottom frame, it // strongly suggests that it is an uninteresting artifact of // measurement -- a stack frame pushed by the signal handler. The // bottom frame is always correct as it is picked up from the signal // structure, not the stack. Check if this is the case and if so, // remove. // Remove up to two frames. maxiter := 2 // Allow one different sample for this many samples with the same // second-to-last frame. similarSamples := 32 margin := len(p.Sample) / similarSamples for iter := 0; iter < maxiter; iter++ { addr1 := make(map[uint64]int) for _, s := range p.Sample { if len(s.Location) > 1 { a := s.Location[1].Address addr1[a] = addr1[a] + 1 } } for id1, count := range addr1 { if count >= len(p.Sample)-margin { // Found uninteresting frame, strip it out from all samples for _, s := range p.Sample { if len(s.Location) > 1 && s.Location[1].Address == id1 { s.Location = append(s.Location[:1], s.Location[2:]...) } } break } } } if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil { return nil, err } cleanupDuplicateLocations(p) return p, nil } func cleanupDuplicateLocations(p *Profile) { // The profile handler may duplicate the leaf frame, because it gets // its address both from stack unwinding and from the signal // context. Detect this and delete the duplicate, which has been // adjusted by -1. The leaf address should not be adjusted as it is // not a call. for _, s := range p.Sample { if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 { s.Location = append(s.Location[:1], s.Location[2:]...) } } } // parseCPUSamples parses a collection of profilez samples from a // profile. // // profilez samples are a repeated sequence of stack frames of the // form: // // 1st word -- The number of times this stack was encountered. // 2nd word -- The size of the stack (StackSize). // 3rd word -- The first address on the stack. // ... // StackSize + 2 -- The last address on the stack // // The last stack trace is of the form: // // 1st word -- 0 // 2nd word -- 1 // 3rd word -- 0 // // Addresses from stack traces may point to the next instruction after // each call. Optionally adjust by -1 to land somewhere on the actual // call (except for the leaf, which is not a call). func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) { locs := make(map[uint64]*Location) for len(b) > 0 { var count, nstk uint64 count, b = parse(b) nstk, b = parse(b) if b == nil || nstk > uint64(len(b)/4) { return nil, nil, errUnrecognized } var sloc []*Location addrs := make([]uint64, nstk) for i := 0; i < int(nstk); i++ { addrs[i], b = parse(b) } if count == 0 && nstk == 1 && addrs[0] == 0 { // End of data marker break } for i, addr := range addrs { if adjust && i > 0 { addr-- } loc := locs[addr] if loc == nil { loc = &Location{ Address: addr, } locs[addr] = loc p.Location = append(p.Location, loc) } sloc = append(sloc, loc) } p.Sample = append(p.Sample, &Sample{ Value: []int64{int64(count), int64(count) * p.Period}, Location: sloc, }) } // Reached the end without finding the EOD marker. return b, locs, nil } // parseHeap parses a heapz legacy or a growthz profile and // returns a newly populated Profile. func parseHeap(b []byte) (p *Profile, err error) { s := bufio.NewScanner(bytes.NewBuffer(b)) if !s.Scan() { if err := s.Err(); err != nil { return nil, err } return nil, errUnrecognized } p = &Profile{} sampling := "" hasAlloc := false line := s.Text() p.PeriodType = &ValueType{Type: "space", Unit: "bytes"} if header := heapHeaderRE.FindStringSubmatch(line); header != nil { sampling, p.Period, hasAlloc, err = parseHeapHeader(line) if err != nil { return nil, err } } else if header = growthHeaderRE.FindStringSubmatch(line); header != nil { p.Period = 1 } else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil { p.Period = 1 } else { return nil, errUnrecognized } if hasAlloc { // Put alloc before inuse so that default pprof selection // will prefer inuse_space. p.SampleType = []*ValueType{ {Type: "alloc_objects", Unit: "count"}, {Type: "alloc_space", Unit: "bytes"}, {Type: "inuse_objects", Unit: "count"}, {Type: "inuse_space", Unit: "bytes"}, } } else { p.SampleType = []*ValueType{ {Type: "objects", Unit: "count"}, {Type: "space", Unit: "bytes"}, } } locs := make(map[uint64]*Location) for s.Scan() { line := strings.TrimSpace(s.Text()) if isSpaceOrComment(line) { continue } if isMemoryMapSentinel(line) { break } value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc) if err != nil { return nil, err } var sloc []*Location for _, addr := range addrs { // Addresses from stack traces point to the next instruction after // each call. Adjust by -1 to land somewhere on the actual call. addr-- loc := locs[addr] if locs[addr] == nil { loc = &Location{ Address: addr, } p.Location = append(p.Location, loc) locs[addr] = loc } sloc = append(sloc, loc) } p.Sample = append(p.Sample, &Sample{ Value: value, Location: sloc, NumLabel: map[string][]int64{"bytes": {blocksize}}, }) } if err := s.Err(); err != nil { return nil, err } if err := parseAdditionalSections(s, p); err != nil { return nil, err } return p, nil } func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) { header := heapHeaderRE.FindStringSubmatch(line) if header == nil { return "", 0, false, errUnrecognized } if len(header[6]) > 0 { if period, err = strconv.ParseInt(header[6], 10, 64); err != nil { return "", 0, false, errUnrecognized } } if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") { hasAlloc = true } switch header[5] { case "heapz_v2", "heap_v2": return "v2", period, hasAlloc, nil case "heapprofile": return "", 1, hasAlloc, nil case "heap": return "v2", period / 2, hasAlloc, nil default: return "", 0, false, errUnrecognized } } // parseHeapSample parses a single row from a heap profile into a new Sample. func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) { sampleData := heapSampleRE.FindStringSubmatch(line) if len(sampleData) != 6 { return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData)) } // This is a local-scoped helper function to avoid needing to pass // around rate, sampling and many return parameters. addValues := func(countString, sizeString string, label string) error { count, err := strconv.ParseInt(countString, 10, 64) if err != nil { return fmt.Errorf("malformed sample: %s: %v", line, err) } size, err := strconv.ParseInt(sizeString, 10, 64) if err != nil { return fmt.Errorf("malformed sample: %s: %v", line, err) } if count == 0 && size != 0 { return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size) } if count != 0 { blocksize = size / count if sampling == "v2" { count, size = scaleHeapSample(count, size, rate) } } value = append(value, count, size) return nil } if includeAlloc { if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil { return nil, 0, nil, err } } if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil { return nil, 0, nil, err } addrs, err = parseHexAddresses(sampleData[5]) if err != nil { return nil, 0, nil, fmt.Errorf("malformed sample: %s: %v", line, err) } return value, blocksize, addrs, nil } // parseHexAddresses extracts hex numbers from a string, attempts to convert // each to an unsigned 64-bit number and returns the resulting numbers as a // slice, or an error if the string contains hex numbers which are too large to // handle (which means a malformed profile). func parseHexAddresses(s string) ([]uint64, error) { hexStrings := hexNumberRE.FindAllString(s, -1) var addrs []uint64 for _, s := range hexStrings { if addr, err := strconv.ParseUint(s, 0, 64); err == nil { addrs = append(addrs, addr) } else { return nil, fmt.Errorf("failed to parse as hex 64-bit number: %s", s) } } return addrs, nil } // scaleHeapSample adjusts the data from a heapz Sample to // account for its probability of appearing in the collected // data. heapz profiles are a sampling of the memory allocations // requests in a program. We estimate the unsampled value by dividing // each collected sample by its probability of appearing in the // profile. heapz v2 profiles rely on a poisson process to determine // which samples to collect, based on the desired average collection // rate R. The probability of a sample of size S to appear in that // profile is 1-exp(-S/R). func scaleHeapSample(count, size, rate int64) (int64, int64) { if count == 0 || size == 0 { return 0, 0 } if rate <= 1 { // if rate==1 all samples were collected so no adjustment is needed. // if rate<1 treat as unknown and skip scaling. return count, size } avgSize := float64(size) / float64(count) scale := 1 / (1 - math.Exp(-avgSize/float64(rate))) return int64(float64(count) * scale), int64(float64(size) * scale) } // parseContention parses a mutex or contention profile. There are 2 cases: // "--- contentionz " for legacy C++ profiles (and backwards compatibility) // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime. func parseContention(b []byte) (*Profile, error) { s := bufio.NewScanner(bytes.NewBuffer(b)) if !s.Scan() { if err := s.Err(); err != nil { return nil, err } return nil, errUnrecognized } switch l := s.Text(); { case strings.HasPrefix(l, "--- contentionz "): case strings.HasPrefix(l, "--- mutex:"): case strings.HasPrefix(l, "--- contention:"): default: return nil, errUnrecognized } p := &Profile{ PeriodType: &ValueType{Type: "contentions", Unit: "count"}, Period: 1, SampleType: []*ValueType{ {Type: "contentions", Unit: "count"}, {Type: "delay", Unit: "nanoseconds"}, }, } var cpuHz int64 // Parse text of the form "attribute = value" before the samples. const delimiter = "=" for s.Scan() { line := s.Text() if line = strings.TrimSpace(line); isSpaceOrComment(line) { continue } if strings.HasPrefix(line, "---") { break } attr := strings.SplitN(line, delimiter, 2) if len(attr) != 2 { break } key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]) var err error switch key { case "cycles/second": if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil { return nil, errUnrecognized } case "sampling period": if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil { return nil, errUnrecognized } case "ms since reset": ms, err := strconv.ParseInt(val, 0, 64) if err != nil { return nil, errUnrecognized } p.DurationNanos = ms * 1000 * 1000 case "format": // CPP contentionz profiles don't have format. return nil, errUnrecognized case "resolution": // CPP contentionz profiles don't have resolution. return nil, errUnrecognized case "discarded samples": default: return nil, errUnrecognized } } if err := s.Err(); err != nil { return nil, err } locs := make(map[uint64]*Location) for { line := strings.TrimSpace(s.Text()) if strings.HasPrefix(line, "---") { break } if !isSpaceOrComment(line) { value, addrs, err := parseContentionSample(line, p.Period, cpuHz) if err != nil { return nil, err } var sloc []*Location for _, addr := range addrs { // Addresses from stack traces point to the next instruction after // each call. Adjust by -1 to land somewhere on the actual call. addr-- loc := locs[addr] if locs[addr] == nil { loc = &Location{ Address: addr, } p.Location = append(p.Location, loc) locs[addr] = loc } sloc = append(sloc, loc) } p.Sample = append(p.Sample, &Sample{ Value: value, Location: sloc, }) } if !s.Scan() { break } } if err := s.Err(); err != nil { return nil, err } if err := parseAdditionalSections(s, p); err != nil { return nil, err } return p, nil } // parseContentionSample parses a single row from a contention profile // into a new Sample. func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) { sampleData := contentionSampleRE.FindStringSubmatch(line) if sampleData == nil { return nil, nil, errUnrecognized } v1, err := strconv.ParseInt(sampleData[1], 10, 64) if err != nil { return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err) } v2, err := strconv.ParseInt(sampleData[2], 10, 64) if err != nil { return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err) } // Unsample values if period and cpuHz are available. // - Delays are scaled to cycles and then to nanoseconds. // - Contentions are scaled to cycles. if period > 0 { if cpuHz > 0 { cpuGHz := float64(cpuHz) / 1e9 v1 = int64(float64(v1) * float64(period) / cpuGHz) } v2 = v2 * period } value = []int64{v2, v1} addrs, err = parseHexAddresses(sampleData[3]) if err != nil { return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err) } return value, addrs, nil } // parseThread parses a Threadz profile and returns a new Profile. func parseThread(b []byte) (*Profile, error) { s := bufio.NewScanner(bytes.NewBuffer(b)) // Skip past comments and empty lines seeking a real header. for s.Scan() && isSpaceOrComment(s.Text()) { } line := s.Text() if m := threadzStartRE.FindStringSubmatch(line); m != nil { // Advance over initial comments until first stack trace. for s.Scan() { if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") { break } } } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { return nil, errUnrecognized } p := &Profile{ SampleType: []*ValueType{{Type: "thread", Unit: "count"}}, PeriodType: &ValueType{Type: "thread", Unit: "count"}, Period: 1, } locs := make(map[uint64]*Location) // Recognize each thread and populate profile samples. for !isMemoryMapSentinel(line) { if strings.HasPrefix(line, "---- no stack trace for") { break } if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { return nil, errUnrecognized } var addrs []uint64 var err error line, addrs, err = parseThreadSample(s) if err != nil { return nil, err } if len(addrs) == 0 { // We got a --same as previous threads--. Bump counters. if len(p.Sample) > 0 { s := p.Sample[len(p.Sample)-1] s.Value[0]++ } continue } var sloc []*Location for i, addr := range addrs { // Addresses from stack traces point to the next instruction after // each call. Adjust by -1 to land somewhere on the actual call // (except for the leaf, which is not a call). if i > 0 { addr-- } loc := locs[addr] if locs[addr] == nil { loc = &Location{ Address: addr, } p.Location = append(p.Location, loc) locs[addr] = loc } sloc = append(sloc, loc) } p.Sample = append(p.Sample, &Sample{ Value: []int64{1}, Location: sloc, }) } if err := parseAdditionalSections(s, p); err != nil { return nil, err } cleanupDuplicateLocations(p) return p, nil } // parseThreadSample parses a symbolized or unsymbolized stack trace. // Returns the first line after the traceback, the sample (or nil if // it hits a 'same-as-previous' marker) and an error. func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) { var line string sameAsPrevious := false for s.Scan() { line = strings.TrimSpace(s.Text()) if line == "" { continue } if strings.HasPrefix(line, "---") { break } if strings.Contains(line, "same as previous thread") { sameAsPrevious = true continue } curAddrs, err := parseHexAddresses(line) if err != nil { return "", nil, fmt.Errorf("malformed sample: %s: %v", line, err) } addrs = append(addrs, curAddrs...) } if err := s.Err(); err != nil { return "", nil, err } if sameAsPrevious { return line, nil, nil } return line, addrs, nil } // parseAdditionalSections parses any additional sections in the // profile, ignoring any unrecognized sections. func parseAdditionalSections(s *bufio.Scanner, p *Profile) error { for !isMemoryMapSentinel(s.Text()) && s.Scan() { } if err := s.Err(); err != nil { return err } return p.ParseMemoryMapFromScanner(s) } // ParseProcMaps parses a memory map in the format of /proc/self/maps. // ParseMemoryMap should be called after setting on a profile to // associate locations to the corresponding mapping based on their // address. func ParseProcMaps(rd io.Reader) ([]*Mapping, error) { s := bufio.NewScanner(rd) return parseProcMapsFromScanner(s) } func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) { var mapping []*Mapping var attrs []string const delimiter = "=" r := strings.NewReplacer() for s.Scan() { line := r.Replace(removeLoggingInfo(s.Text())) m, err := parseMappingEntry(line) if err != nil { if err == errUnrecognized { // Recognize assignments of the form: attr=value, and replace // $attr with value on subsequent mappings. if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 { attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])) r = strings.NewReplacer(attrs...) } // Ignore any unrecognized entries continue } return nil, err } if m == nil { continue } mapping = append(mapping, m) } if err := s.Err(); err != nil { return nil, err } return mapping, nil } // removeLoggingInfo detects and removes log prefix entries generated // by the glog package. If no logging prefix is detected, the string // is returned unmodified. func removeLoggingInfo(line string) string { if match := logInfoRE.FindStringIndex(line); match != nil { return line[match[1]:] } return line } // ParseMemoryMap parses a memory map in the format of // /proc/self/maps, and overrides the mappings in the current profile. // It renumbers the samples and locations in the profile correspondingly. func (p *Profile) ParseMemoryMap(rd io.Reader) error { return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd)) } // ParseMemoryMapFromScanner parses a memory map in the format of // /proc/self/maps or a variety of legacy format, and overrides the // mappings in the current profile. It renumbers the samples and // locations in the profile correspondingly. func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error { mapping, err := parseProcMapsFromScanner(s) if err != nil { return err } p.Mapping = append(p.Mapping, mapping...) p.massageMappings() p.remapLocationIDs() p.remapFunctionIDs() p.remapMappingIDs() return nil } func parseMappingEntry(l string) (*Mapping, error) { var start, end, perm, file, offset, buildID string if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 { start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5] } else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 { start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6] } else { return nil, errUnrecognized } var err error mapping := &Mapping{ File: file, BuildID: buildID, } if perm != "" && !strings.Contains(perm, "x") { // Skip non-executable entries. return nil, nil } if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil { return nil, errUnrecognized } if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil { return nil, errUnrecognized } if offset != "" { if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil { return nil, errUnrecognized } } return mapping, nil } var memoryMapSentinels = []string{ "--- Memory map: ---", "MAPPED_LIBRARIES:", } // isMemoryMapSentinel returns true if the string contains one of the // known sentinels for memory map information. func isMemoryMapSentinel(line string) bool { for _, s := range memoryMapSentinels { if strings.Contains(line, s) { return true } } return false } func (p *Profile) addLegacyFrameInfo() { switch { case isProfileType(p, heapzSampleTypes): p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr case isProfileType(p, contentionzSampleTypes): p.DropFrames, p.KeepFrames = lockRxStr, "" default: p.DropFrames, p.KeepFrames = cpuProfilerRxStr, "" } } var heapzSampleTypes = [][]string{ {"allocations", "size"}, // early Go pprof profiles {"objects", "space"}, {"inuse_objects", "inuse_space"}, {"alloc_objects", "alloc_space"}, {"alloc_objects", "alloc_space", "inuse_objects", "inuse_space"}, // Go pprof legacy profiles } var contentionzSampleTypes = [][]string{ {"contentions", "delay"}, } func isProfileType(p *Profile, types [][]string) bool { st := p.SampleType nextType: for _, t := range types { if len(st) != len(t) { continue } for i := range st { if st[i].Type != t[i] { continue nextType } } return true } return false } var allocRxStr = strings.Join([]string{ // POSIX entry points. `calloc`, `cfree`, `malloc`, `free`, `memalign`, `do_memalign`, `(__)?posix_memalign`, `pvalloc`, `valloc`, `realloc`, // TC malloc. `tcmalloc::.*`, `tc_calloc`, `tc_cfree`, `tc_malloc`, `tc_free`, `tc_memalign`, `tc_posix_memalign`, `tc_pvalloc`, `tc_valloc`, `tc_realloc`, `tc_new`, `tc_delete`, `tc_newarray`, `tc_deletearray`, `tc_new_nothrow`, `tc_newarray_nothrow`, // Memory-allocation routines on OS X. `malloc_zone_malloc`, `malloc_zone_calloc`, `malloc_zone_valloc`, `malloc_zone_realloc`, `malloc_zone_memalign`, `malloc_zone_free`, // Go runtime `runtime\..*`, // Other misc. memory allocation routines `BaseArena::.*`, `(::)?do_malloc_no_errno`, `(::)?do_malloc_pages`, `(::)?do_malloc`, `DoSampledAllocation`, `MallocedMemBlock::MallocedMemBlock`, `_M_allocate`, `__builtin_(vec_)?delete`, `__builtin_(vec_)?new`, `__gnu_cxx::new_allocator::allocate`, `__libc_malloc`, `__malloc_alloc_template::allocate`, `allocate`, `cpp_alloc`, `operator new(\[\])?`, `simple_alloc::allocate`, }, `|`) var allocSkipRxStr = strings.Join([]string{ // Preserve Go runtime frames that appear in the middle/bottom of // the stack. `runtime\.panic`, `runtime\.reflectcall`, `runtime\.call[0-9]*`, }, `|`) var cpuProfilerRxStr = strings.Join([]string{ `ProfileData::Add`, `ProfileData::prof_handler`, `CpuProfiler::prof_handler`, `__pthread_sighandler`, `__restore`, }, `|`) var lockRxStr = strings.Join([]string{ `RecordLockProfileData`, `(base::)?RecordLockProfileData.*`, `(base::)?SubmitMutexProfileData.*`, `(base::)?SubmitSpinLockProfileData.*`, `(base::Mutex::)?AwaitCommon.*`, `(base::Mutex::)?Unlock.*`, `(base::Mutex::)?UnlockSlow.*`, `(base::Mutex::)?ReaderUnlock.*`, `(base::MutexLock::)?~MutexLock.*`, `(Mutex::)?AwaitCommon.*`, `(Mutex::)?Unlock.*`, `(Mutex::)?UnlockSlow.*`, `(Mutex::)?ReaderUnlock.*`, `(MutexLock::)?~MutexLock.*`, `(SpinLock::)?Unlock.*`, `(SpinLock::)?SlowUnlock.*`, `(SpinLockHolder::)?~SpinLockHolder.*`, }, `|`)