Source file src/cmd/vendor/github.com/google/pprof/internal/elfexec/elfexec.go

     1  // Copyright 2014 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package elfexec provides utility routines to examine ELF binaries.
    16  package elfexec
    17  
    18  import (
    19  	"bufio"
    20  	"debug/elf"
    21  	"encoding/binary"
    22  	"fmt"
    23  	"io"
    24  )
    25  
    26  const (
    27  	maxNoteSize        = 1 << 20 // in bytes
    28  	noteTypeGNUBuildID = 3
    29  )
    30  
    31  // elfNote is the payload of a Note Section in an ELF file.
    32  type elfNote struct {
    33  	Name string // Contents of the "name" field, omitting the trailing zero byte.
    34  	Desc []byte // Contents of the "desc" field.
    35  	Type uint32 // Contents of the "type" field.
    36  }
    37  
    38  // parseNotes returns the notes from a SHT_NOTE section or PT_NOTE segment.
    39  func parseNotes(reader io.Reader, alignment int, order binary.ByteOrder) ([]elfNote, error) {
    40  	r := bufio.NewReader(reader)
    41  
    42  	// padding returns the number of bytes required to pad the given size to an
    43  	// alignment boundary.
    44  	padding := func(size int) int {
    45  		return ((size + (alignment - 1)) &^ (alignment - 1)) - size
    46  	}
    47  
    48  	var notes []elfNote
    49  	for {
    50  		noteHeader := make([]byte, 12) // 3 4-byte words
    51  		if _, err := io.ReadFull(r, noteHeader); err == io.EOF {
    52  			break
    53  		} else if err != nil {
    54  			return nil, err
    55  		}
    56  		namesz := order.Uint32(noteHeader[0:4])
    57  		descsz := order.Uint32(noteHeader[4:8])
    58  		typ := order.Uint32(noteHeader[8:12])
    59  
    60  		if uint64(namesz) > uint64(maxNoteSize) {
    61  			return nil, fmt.Errorf("note name too long (%d bytes)", namesz)
    62  		}
    63  		var name string
    64  		if namesz > 0 {
    65  			// Documentation differs as to whether namesz is meant to include the
    66  			// trailing zero, but everyone agrees that name is null-terminated.
    67  			// So we'll just determine the actual length after the fact.
    68  			var err error
    69  			name, err = r.ReadString('\x00')
    70  			if err == io.EOF {
    71  				return nil, fmt.Errorf("missing note name (want %d bytes)", namesz)
    72  			} else if err != nil {
    73  				return nil, err
    74  			}
    75  			namesz = uint32(len(name))
    76  			name = name[:len(name)-1]
    77  		}
    78  
    79  		// Drop padding bytes until the desc field.
    80  		for n := padding(len(noteHeader) + int(namesz)); n > 0; n-- {
    81  			if _, err := r.ReadByte(); err == io.EOF {
    82  				return nil, fmt.Errorf(
    83  					"missing %d bytes of padding after note name", n)
    84  			} else if err != nil {
    85  				return nil, err
    86  			}
    87  		}
    88  
    89  		if uint64(descsz) > uint64(maxNoteSize) {
    90  			return nil, fmt.Errorf("note desc too long (%d bytes)", descsz)
    91  		}
    92  		desc := make([]byte, int(descsz))
    93  		if _, err := io.ReadFull(r, desc); err == io.EOF {
    94  			return nil, fmt.Errorf("missing desc (want %d bytes)", len(desc))
    95  		} else if err != nil {
    96  			return nil, err
    97  		}
    98  
    99  		notes = append(notes, elfNote{Name: name, Desc: desc, Type: typ})
   100  
   101  		// Drop padding bytes until the next note or the end of the section,
   102  		// whichever comes first.
   103  		for n := padding(len(desc)); n > 0; n-- {
   104  			if _, err := r.ReadByte(); err == io.EOF {
   105  				// We hit the end of the section before an alignment boundary.
   106  				// This can happen if this section is at the end of the file or the next
   107  				// section has a smaller alignment requirement.
   108  				break
   109  			} else if err != nil {
   110  				return nil, err
   111  			}
   112  		}
   113  	}
   114  	return notes, nil
   115  }
   116  
   117  // GetBuildID returns the GNU build-ID for an ELF binary.
   118  //
   119  // If no build-ID was found but the binary was read without error, it returns
   120  // (nil, nil).
   121  func GetBuildID(binary io.ReaderAt) ([]byte, error) {
   122  	f, err := elf.NewFile(binary)
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  
   127  	findBuildID := func(notes []elfNote) ([]byte, error) {
   128  		var buildID []byte
   129  		for _, note := range notes {
   130  			if note.Name == "GNU" && note.Type == noteTypeGNUBuildID {
   131  				if buildID == nil {
   132  					buildID = note.Desc
   133  				} else {
   134  					return nil, fmt.Errorf("multiple build ids found, don't know which to use")
   135  				}
   136  			}
   137  		}
   138  		return buildID, nil
   139  	}
   140  
   141  	for _, p := range f.Progs {
   142  		if p.Type != elf.PT_NOTE {
   143  			continue
   144  		}
   145  		notes, err := parseNotes(p.Open(), int(p.Align), f.ByteOrder)
   146  		if err != nil {
   147  			return nil, err
   148  		}
   149  		if b, err := findBuildID(notes); b != nil || err != nil {
   150  			return b, err
   151  		}
   152  	}
   153  	for _, s := range f.Sections {
   154  		if s.Type != elf.SHT_NOTE {
   155  			continue
   156  		}
   157  		notes, err := parseNotes(s.Open(), int(s.Addralign), f.ByteOrder)
   158  		if err != nil {
   159  			return nil, err
   160  		}
   161  		if b, err := findBuildID(notes); b != nil || err != nil {
   162  			return b, err
   163  		}
   164  	}
   165  	return nil, nil
   166  }
   167  
   168  // kernelBase calculates the base for kernel mappings, which usually require
   169  // special handling. For kernel mappings, tools (like perf) use the address of
   170  // the kernel relocation symbol (_text or _stext) as the mmap start. Additionally,
   171  // for obfuscation, ChromeOS profiles have the kernel image remapped to the 0-th page.
   172  func kernelBase(loadSegment *elf.ProgHeader, stextOffset *uint64, start, limit, offset uint64) (uint64, bool) {
   173  	const (
   174  		// PAGE_OFFSET for PowerPC64, see arch/powerpc/Kconfig in the kernel sources.
   175  		pageOffsetPpc64 = 0xc000000000000000
   176  		pageSize        = 4096
   177  	)
   178  
   179  	if loadSegment.Vaddr == start-offset {
   180  		return offset, true
   181  	}
   182  	if start == 0 && limit != 0 && stextOffset != nil {
   183  		// ChromeOS remaps its kernel to 0. Nothing else should come
   184  		// down this path. Empirical values:
   185  		//       VADDR=0xffffffff80200000
   186  		// stextOffset=0xffffffff80200198
   187  		return start - *stextOffset, true
   188  	}
   189  	if start >= loadSegment.Vaddr && limit > start && (offset == 0 || offset == pageOffsetPpc64 || offset == start) {
   190  		// Some kernels look like:
   191  		//       VADDR=0xffffffff80200000
   192  		// stextOffset=0xffffffff80200198
   193  		//       Start=0xffffffff83200000
   194  		//       Limit=0xffffffff84200000
   195  		//      Offset=0 (0xc000000000000000 for PowerPC64) (== Start for ASLR kernel)
   196  		// So the base should be:
   197  		if stextOffset != nil && (start%pageSize) == (*stextOffset%pageSize) {
   198  			// perf uses the address of _stext as start. Some tools may
   199  			// adjust for this before calling GetBase, in which case the page
   200  			// alignment should be different from that of stextOffset.
   201  			return start - *stextOffset, true
   202  		}
   203  
   204  		return start - loadSegment.Vaddr, true
   205  	}
   206  	if start%pageSize != 0 && stextOffset != nil && *stextOffset%pageSize == start%pageSize {
   207  		// ChromeOS remaps its kernel to 0 + start%pageSize. Nothing
   208  		// else should come down this path. Empirical values:
   209  		//       start=0x198 limit=0x2f9fffff offset=0
   210  		//       VADDR=0xffffffff81000000
   211  		// stextOffset=0xffffffff81000198
   212  		return start - *stextOffset, true
   213  	}
   214  	return 0, false
   215  }
   216  
   217  // GetBase determines the base address to subtract from virtual
   218  // address to get symbol table address. For an executable, the base
   219  // is 0. Otherwise, it's a shared library, and the base is the
   220  // address where the mapping starts. The kernel needs special handling.
   221  func GetBase(fh *elf.FileHeader, loadSegment *elf.ProgHeader, stextOffset *uint64, start, limit, offset uint64) (uint64, error) {
   222  
   223  	if start == 0 && offset == 0 && (limit == ^uint64(0) || limit == 0) {
   224  		// Some tools may introduce a fake mapping that spans the entire
   225  		// address space. Assume that the address has already been
   226  		// adjusted, so no additional base adjustment is necessary.
   227  		return 0, nil
   228  	}
   229  
   230  	switch fh.Type {
   231  	case elf.ET_EXEC:
   232  		if loadSegment == nil {
   233  			// Assume fixed-address executable and so no adjustment.
   234  			return 0, nil
   235  		}
   236  		if stextOffset == nil && start > 0 && start < 0x8000000000000000 {
   237  			// A regular user-mode executable. Compute the base offset using same
   238  			// arithmetics as in ET_DYN case below, see the explanation there.
   239  			// Ideally, the condition would just be "stextOffset == nil" as that
   240  			// represents the address of _stext symbol in the vmlinux image. Alas,
   241  			// the caller may skip reading it from the binary (it's expensive to scan
   242  			// all the symbols) and so it may be nil even for the kernel executable.
   243  			// So additionally check that the start is within the user-mode half of
   244  			// the 64-bit address space.
   245  			return start - offset + loadSegment.Off - loadSegment.Vaddr, nil
   246  		}
   247  		// Various kernel heuristics and cases are handled separately.
   248  		if base, match := kernelBase(loadSegment, stextOffset, start, limit, offset); match {
   249  			return base, nil
   250  		}
   251  		// ChromeOS can remap its kernel to 0, and the caller might have not found
   252  		// the _stext symbol. Split this case from kernelBase() above, since we don't
   253  		// want to apply it to an ET_DYN user-mode executable.
   254  		if start == 0 && limit != 0 && stextOffset == nil {
   255  			return start - loadSegment.Vaddr, nil
   256  		}
   257  
   258  		return 0, fmt.Errorf("don't know how to handle EXEC segment: %v start=0x%x limit=0x%x offset=0x%x", *loadSegment, start, limit, offset)
   259  	case elf.ET_REL:
   260  		if offset != 0 {
   261  			return 0, fmt.Errorf("don't know how to handle mapping.Offset")
   262  		}
   263  		return start, nil
   264  	case elf.ET_DYN:
   265  		// The process mapping information, start = start of virtual address range,
   266  		// and offset = offset in the executable file of the start address, tells us
   267  		// that a runtime virtual address x maps to a file offset
   268  		// fx = x - start + offset.
   269  		if loadSegment == nil {
   270  			return start - offset, nil
   271  		}
   272  		// Kernels compiled as PIE can be ET_DYN as well. Use heuristic, similar to
   273  		// the ET_EXEC case above.
   274  		if base, match := kernelBase(loadSegment, stextOffset, start, limit, offset); match {
   275  			return base, nil
   276  		}
   277  		// The program header, if not nil, indicates the offset in the file where
   278  		// the executable segment is located (loadSegment.Off), and the base virtual
   279  		// address where the first byte of the segment is loaded
   280  		// (loadSegment.Vaddr). A file offset fx maps to a virtual (symbol) address
   281  		// sx = fx - loadSegment.Off + loadSegment.Vaddr.
   282  		//
   283  		// Thus, a runtime virtual address x maps to a symbol address
   284  		// sx = x - start + offset - loadSegment.Off + loadSegment.Vaddr.
   285  		return start - offset + loadSegment.Off - loadSegment.Vaddr, nil
   286  	}
   287  	return 0, fmt.Errorf("don't know how to handle FileHeader.Type %v", fh.Type)
   288  }
   289  
   290  // FindTextProgHeader finds the program segment header containing the .text
   291  // section or nil if the segment cannot be found.
   292  func FindTextProgHeader(f *elf.File) *elf.ProgHeader {
   293  	for _, s := range f.Sections {
   294  		if s.Name == ".text" {
   295  			// Find the LOAD segment containing the .text section.
   296  			for _, p := range f.Progs {
   297  				if p.Type == elf.PT_LOAD && p.Flags&elf.PF_X != 0 && s.Addr >= p.Vaddr && s.Addr < p.Vaddr+p.Memsz {
   298  					return &p.ProgHeader
   299  				}
   300  			}
   301  		}
   302  	}
   303  	return nil
   304  }
   305  
   306  // ProgramHeadersForMapping returns the program segment headers that overlap
   307  // the runtime mapping with file offset mapOff and memory size mapSz. We skip
   308  // over segments zero file size because their file offset values are unreliable.
   309  // Even if overlapping, a segment is not selected if its aligned file offset is
   310  // greater than the mapping file offset, or if the mapping includes the last
   311  // page of the segment, but not the full segment and the mapping includes
   312  // additional pages after the segment end.
   313  // The function returns a slice of pointers to the headers in the input
   314  // slice, which are valid only while phdrs is not modified or discarded.
   315  func ProgramHeadersForMapping(phdrs []elf.ProgHeader, mapOff, mapSz uint64) []*elf.ProgHeader {
   316  	const (
   317  		// pageSize defines the virtual memory page size used by the loader. This
   318  		// value is dependent on the memory management unit of the CPU. The page
   319  		// size is 4KB virtually on all the architectures that we care about, so we
   320  		// define this metric as a constant. If we encounter architectures where
   321  		// page sie is not 4KB, we must try to guess the page size on the system
   322  		// where the profile was collected, possibly using the architecture
   323  		// specified in the ELF file header.
   324  		pageSize       = 4096
   325  		pageOffsetMask = pageSize - 1
   326  	)
   327  	mapLimit := mapOff + mapSz
   328  	var headers []*elf.ProgHeader
   329  	for i := range phdrs {
   330  		p := &phdrs[i]
   331  		// Skip over segments with zero file size. Their file offsets can have
   332  		// arbitrary values, see b/195427553.
   333  		if p.Filesz == 0 {
   334  			continue
   335  		}
   336  		segLimit := p.Off + p.Memsz
   337  		// The segment must overlap the mapping.
   338  		if p.Type == elf.PT_LOAD && mapOff < segLimit && p.Off < mapLimit {
   339  			// If the mapping offset is strictly less than the page aligned segment
   340  			// offset, then this mapping comes from a different segment, fixes
   341  			// b/179920361.
   342  			alignedSegOffset := uint64(0)
   343  			if p.Off > (p.Vaddr & pageOffsetMask) {
   344  				alignedSegOffset = p.Off - (p.Vaddr & pageOffsetMask)
   345  			}
   346  			if mapOff < alignedSegOffset {
   347  				continue
   348  			}
   349  			// If the mapping starts in the middle of the segment, it covers less than
   350  			// one page of the segment, and it extends at least one page past the
   351  			// segment, then this mapping comes from a different segment.
   352  			if mapOff > p.Off && (segLimit < mapOff+pageSize) && (mapLimit >= segLimit+pageSize) {
   353  				continue
   354  			}
   355  			headers = append(headers, p)
   356  		}
   357  	}
   358  	return headers
   359  }
   360  
   361  // HeaderForFileOffset attempts to identify a unique program header that
   362  // includes the given file offset. It returns an error if it cannot identify a
   363  // unique header.
   364  func HeaderForFileOffset(headers []*elf.ProgHeader, fileOffset uint64) (*elf.ProgHeader, error) {
   365  	var ph *elf.ProgHeader
   366  	for _, h := range headers {
   367  		if fileOffset >= h.Off && fileOffset < h.Off+h.Memsz {
   368  			if ph != nil {
   369  				// Assuming no other bugs, this can only happen if we have two or
   370  				// more small program segments that fit on the same page, and a
   371  				// segment other than the last one includes uninitialized data, or
   372  				// if the debug binary used for symbolization is stripped of some
   373  				// sections, so segment file sizes are smaller than memory sizes.
   374  				return nil, fmt.Errorf("found second program header (%#v) that matches file offset %x, first program header is %#v. Is this a stripped binary, or does the first program segment contain uninitialized data?", *h, fileOffset, *ph)
   375  			}
   376  			ph = h
   377  		}
   378  	}
   379  	if ph == nil {
   380  		return nil, fmt.Errorf("no program header matches file offset %x", fileOffset)
   381  	}
   382  	return ph, nil
   383  }
   384  

View as plain text