Black Lives Matter. Support the Equal Justice Initiative.

Source file src/cmd/vendor/github.com/google/pprof/internal/binutils/binutils.go

Documentation: cmd/vendor/github.com/google/pprof/internal/binutils

     1  // Copyright 2014 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package binutils provides access to the GNU binutils.
    16  package binutils
    17  
    18  import (
    19  	"debug/elf"
    20  	"debug/macho"
    21  	"encoding/binary"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"os/exec"
    26  	"path/filepath"
    27  	"regexp"
    28  	"runtime"
    29  	"strings"
    30  	"sync"
    31  
    32  	"github.com/google/pprof/internal/elfexec"
    33  	"github.com/google/pprof/internal/plugin"
    34  )
    35  
    36  // A Binutils implements plugin.ObjTool by invoking the GNU binutils.
    37  type Binutils struct {
    38  	mu  sync.Mutex
    39  	rep *binrep
    40  }
    41  
    42  // binrep is an immutable representation for Binutils.  It is atomically
    43  // replaced on every mutation to provide thread-safe access.
    44  type binrep struct {
    45  	// Commands to invoke.
    46  	llvmSymbolizer      string
    47  	llvmSymbolizerFound bool
    48  	addr2line           string
    49  	addr2lineFound      bool
    50  	nm                  string
    51  	nmFound             bool
    52  	objdump             string
    53  	objdumpFound        bool
    54  
    55  	// if fast, perform symbolization using nm (symbol names only),
    56  	// instead of file-line detail from the slower addr2line.
    57  	fast bool
    58  }
    59  
    60  // get returns the current representation for bu, initializing it if necessary.
    61  func (bu *Binutils) get() *binrep {
    62  	bu.mu.Lock()
    63  	r := bu.rep
    64  	if r == nil {
    65  		r = &binrep{}
    66  		initTools(r, "")
    67  		bu.rep = r
    68  	}
    69  	bu.mu.Unlock()
    70  	return r
    71  }
    72  
    73  // update modifies the rep for bu via the supplied function.
    74  func (bu *Binutils) update(fn func(r *binrep)) {
    75  	r := &binrep{}
    76  	bu.mu.Lock()
    77  	defer bu.mu.Unlock()
    78  	if bu.rep == nil {
    79  		initTools(r, "")
    80  	} else {
    81  		*r = *bu.rep
    82  	}
    83  	fn(r)
    84  	bu.rep = r
    85  }
    86  
    87  // String returns string representation of the binutils state for debug logging.
    88  func (bu *Binutils) String() string {
    89  	r := bu.get()
    90  	var llvmSymbolizer, addr2line, nm, objdump string
    91  	if r.llvmSymbolizerFound {
    92  		llvmSymbolizer = r.llvmSymbolizer
    93  	}
    94  	if r.addr2lineFound {
    95  		addr2line = r.addr2line
    96  	}
    97  	if r.nmFound {
    98  		nm = r.nm
    99  	}
   100  	if r.objdumpFound {
   101  		objdump = r.objdump
   102  	}
   103  	return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
   104  		llvmSymbolizer, addr2line, nm, objdump, r.fast)
   105  }
   106  
   107  // SetFastSymbolization sets a toggle that makes binutils use fast
   108  // symbolization (using nm), which is much faster than addr2line but
   109  // provides only symbol name information (no file/line).
   110  func (bu *Binutils) SetFastSymbolization(fast bool) {
   111  	bu.update(func(r *binrep) { r.fast = fast })
   112  }
   113  
   114  // SetTools processes the contents of the tools option. It
   115  // expects a set of entries separated by commas; each entry is a pair
   116  // of the form t:path, where cmd will be used to look only for the
   117  // tool named t. If t is not specified, the path is searched for all
   118  // tools.
   119  func (bu *Binutils) SetTools(config string) {
   120  	bu.update(func(r *binrep) { initTools(r, config) })
   121  }
   122  
   123  func initTools(b *binrep, config string) {
   124  	// paths collect paths per tool; Key "" contains the default.
   125  	paths := make(map[string][]string)
   126  	for _, t := range strings.Split(config, ",") {
   127  		name, path := "", t
   128  		if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
   129  			name, path = ct[0], ct[1]
   130  		}
   131  		paths[name] = append(paths[name], path)
   132  	}
   133  
   134  	defaultPath := paths[""]
   135  	b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...))
   136  	b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...))
   137  	if !b.addr2lineFound {
   138  		// On MacOS, brew installs addr2line under gaddr2line name, so search for
   139  		// that if the tool is not found by its default name.
   140  		b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
   141  	}
   142  	b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
   143  	b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
   144  }
   145  
   146  // findExe looks for an executable command on a set of paths.
   147  // If it cannot find it, returns cmd.
   148  func findExe(cmd string, paths []string) (string, bool) {
   149  	for _, p := range paths {
   150  		cp := filepath.Join(p, cmd)
   151  		if c, err := exec.LookPath(cp); err == nil {
   152  			return c, true
   153  		}
   154  	}
   155  	return cmd, false
   156  }
   157  
   158  // Disasm returns the assembly instructions for the specified address range
   159  // of a binary.
   160  func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
   161  	b := bu.get()
   162  	cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l",
   163  		fmt.Sprintf("--start-address=%#x", start),
   164  		fmt.Sprintf("--stop-address=%#x", end),
   165  		file)
   166  	out, err := cmd.Output()
   167  	if err != nil {
   168  		return nil, fmt.Errorf("%v: %v", cmd.Args, err)
   169  	}
   170  
   171  	return disassemble(out)
   172  }
   173  
   174  // Open satisfies the plugin.ObjTool interface.
   175  func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
   176  	b := bu.get()
   177  
   178  	// Make sure file is a supported executable.
   179  	// This uses magic numbers, mainly to provide better error messages but
   180  	// it should also help speed.
   181  
   182  	if _, err := os.Stat(name); err != nil {
   183  		// For testing, do not require file name to exist.
   184  		if strings.Contains(b.addr2line, "testdata/") {
   185  			return &fileAddr2Line{file: file{b: b, name: name}}, nil
   186  		}
   187  		return nil, err
   188  	}
   189  
   190  	// Read the first 4 bytes of the file.
   191  
   192  	f, err := os.Open(name)
   193  	if err != nil {
   194  		return nil, fmt.Errorf("error opening %s: %v", name, err)
   195  	}
   196  	defer f.Close()
   197  
   198  	var header [4]byte
   199  	if _, err = io.ReadFull(f, header[:]); err != nil {
   200  		return nil, fmt.Errorf("error reading magic number from %s: %v", name, err)
   201  	}
   202  
   203  	elfMagic := string(header[:])
   204  
   205  	// Match against supported file types.
   206  	if elfMagic == elf.ELFMAG {
   207  		f, err := b.openELF(name, start, limit, offset)
   208  		if err != nil {
   209  			return nil, fmt.Errorf("error reading ELF file %s: %v", name, err)
   210  		}
   211  		return f, nil
   212  	}
   213  
   214  	// Mach-O magic numbers can be big or little endian.
   215  	machoMagicLittle := binary.LittleEndian.Uint32(header[:])
   216  	machoMagicBig := binary.BigEndian.Uint32(header[:])
   217  
   218  	if machoMagicLittle == macho.Magic32 || machoMagicLittle == macho.Magic64 ||
   219  		machoMagicBig == macho.Magic32 || machoMagicBig == macho.Magic64 {
   220  		f, err := b.openMachO(name, start, limit, offset)
   221  		if err != nil {
   222  			return nil, fmt.Errorf("error reading Mach-O file %s: %v", name, err)
   223  		}
   224  		return f, nil
   225  	}
   226  	if machoMagicLittle == macho.MagicFat || machoMagicBig == macho.MagicFat {
   227  		f, err := b.openFatMachO(name, start, limit, offset)
   228  		if err != nil {
   229  			return nil, fmt.Errorf("error reading fat Mach-O file %s: %v", name, err)
   230  		}
   231  		return f, nil
   232  	}
   233  
   234  	return nil, fmt.Errorf("unrecognized binary format: %s", name)
   235  }
   236  
   237  func (b *binrep) openMachOCommon(name string, of *macho.File, start, limit, offset uint64) (plugin.ObjFile, error) {
   238  
   239  	// Subtract the load address of the __TEXT section. Usually 0 for shared
   240  	// libraries or 0x100000000 for executables. You can check this value by
   241  	// running `objdump -private-headers <file>`.
   242  
   243  	textSegment := of.Segment("__TEXT")
   244  	if textSegment == nil {
   245  		return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
   246  	}
   247  	if textSegment.Addr > start {
   248  		return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
   249  			name, textSegment.Addr, start)
   250  	}
   251  
   252  	base := start - textSegment.Addr
   253  
   254  	if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
   255  		return &fileNM{file: file{b: b, name: name, base: base}}, nil
   256  	}
   257  	return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
   258  }
   259  
   260  func (b *binrep) openFatMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
   261  	of, err := macho.OpenFat(name)
   262  	if err != nil {
   263  		return nil, fmt.Errorf("error parsing %s: %v", name, err)
   264  	}
   265  	defer of.Close()
   266  
   267  	if len(of.Arches) == 0 {
   268  		return nil, fmt.Errorf("empty fat Mach-O file: %s", name)
   269  	}
   270  
   271  	var arch macho.Cpu
   272  	// Use the host architecture.
   273  	// TODO: This is not ideal because the host architecture may not be the one
   274  	// that was profiled. E.g. an amd64 host can profile a 386 program.
   275  	switch runtime.GOARCH {
   276  	case "386":
   277  		arch = macho.Cpu386
   278  	case "amd64", "amd64p32":
   279  		arch = macho.CpuAmd64
   280  	case "arm", "armbe", "arm64", "arm64be":
   281  		arch = macho.CpuArm
   282  	case "ppc":
   283  		arch = macho.CpuPpc
   284  	case "ppc64", "ppc64le":
   285  		arch = macho.CpuPpc64
   286  	default:
   287  		return nil, fmt.Errorf("unsupported host architecture for %s: %s", name, runtime.GOARCH)
   288  	}
   289  	for i := range of.Arches {
   290  		if of.Arches[i].Cpu == arch {
   291  			return b.openMachOCommon(name, of.Arches[i].File, start, limit, offset)
   292  		}
   293  	}
   294  	return nil, fmt.Errorf("architecture not found in %s: %s", name, runtime.GOARCH)
   295  }
   296  
   297  func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
   298  	of, err := macho.Open(name)
   299  	if err != nil {
   300  		return nil, fmt.Errorf("error parsing %s: %v", name, err)
   301  	}
   302  	defer of.Close()
   303  
   304  	return b.openMachOCommon(name, of, start, limit, offset)
   305  }
   306  
   307  func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
   308  	ef, err := elf.Open(name)
   309  	if err != nil {
   310  		return nil, fmt.Errorf("error parsing %s: %v", name, err)
   311  	}
   312  	defer ef.Close()
   313  
   314  	var stextOffset *uint64
   315  	var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
   316  	if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
   317  		// Reading all Symbols is expensive, and we only rarely need it so
   318  		// we don't want to do it every time. But if _stext happens to be
   319  		// page-aligned but isn't the same as Vaddr, we would symbolize
   320  		// wrong. So if the name the addresses aren't page aligned, or if
   321  		// the name is "vmlinux" we read _stext. We can be wrong if: (1)
   322  		// someone passes a kernel path that doesn't contain "vmlinux" AND
   323  		// (2) _stext is page-aligned AND (3) _stext is not at Vaddr
   324  		symbols, err := ef.Symbols()
   325  		if err != nil && err != elf.ErrNoSymbols {
   326  			return nil, err
   327  		}
   328  		for _, s := range symbols {
   329  			if s.Name == "_stext" {
   330  				// The kernel may use _stext as the mapping start address.
   331  				stextOffset = &s.Value
   332  				break
   333  			}
   334  		}
   335  	}
   336  
   337  	base, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), stextOffset, start, limit, offset)
   338  	if err != nil {
   339  		return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
   340  	}
   341  
   342  	buildID := ""
   343  	if f, err := os.Open(name); err == nil {
   344  		if id, err := elfexec.GetBuildID(f); err == nil {
   345  			buildID = fmt.Sprintf("%x", id)
   346  		}
   347  	}
   348  	if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
   349  		return &fileNM{file: file{b, name, base, buildID}}, nil
   350  	}
   351  	return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
   352  }
   353  
   354  // file implements the binutils.ObjFile interface.
   355  type file struct {
   356  	b       *binrep
   357  	name    string
   358  	base    uint64
   359  	buildID string
   360  }
   361  
   362  func (f *file) Name() string {
   363  	return f.name
   364  }
   365  
   366  func (f *file) Base() uint64 {
   367  	return f.base
   368  }
   369  
   370  func (f *file) BuildID() string {
   371  	return f.buildID
   372  }
   373  
   374  func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
   375  	return []plugin.Frame{}, nil
   376  }
   377  
   378  func (f *file) Close() error {
   379  	return nil
   380  }
   381  
   382  func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
   383  	// Get from nm a list of symbols sorted by address.
   384  	cmd := exec.Command(f.b.nm, "-n", f.name)
   385  	out, err := cmd.Output()
   386  	if err != nil {
   387  		return nil, fmt.Errorf("%v: %v", cmd.Args, err)
   388  	}
   389  
   390  	return findSymbols(out, f.name, r, addr)
   391  }
   392  
   393  // fileNM implements the binutils.ObjFile interface, using 'nm' to map
   394  // addresses to symbols (without file/line number information). It is
   395  // faster than fileAddr2Line.
   396  type fileNM struct {
   397  	file
   398  	addr2linernm *addr2LinerNM
   399  }
   400  
   401  func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
   402  	if f.addr2linernm == nil {
   403  		addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
   404  		if err != nil {
   405  			return nil, err
   406  		}
   407  		f.addr2linernm = addr2liner
   408  	}
   409  	return f.addr2linernm.addrInfo(addr)
   410  }
   411  
   412  // fileAddr2Line implements the binutils.ObjFile interface, using
   413  // llvm-symbolizer, if that's available, or addr2line to map addresses to
   414  // symbols (with file/line number information). It can be slow for large
   415  // binaries with debug information.
   416  type fileAddr2Line struct {
   417  	once sync.Once
   418  	file
   419  	addr2liner     *addr2Liner
   420  	llvmSymbolizer *llvmSymbolizer
   421  }
   422  
   423  func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
   424  	f.once.Do(f.init)
   425  	if f.llvmSymbolizer != nil {
   426  		return f.llvmSymbolizer.addrInfo(addr)
   427  	}
   428  	if f.addr2liner != nil {
   429  		return f.addr2liner.addrInfo(addr)
   430  	}
   431  	return nil, fmt.Errorf("could not find local addr2liner")
   432  }
   433  
   434  func (f *fileAddr2Line) init() {
   435  	if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
   436  		f.llvmSymbolizer = llvmSymbolizer
   437  		return
   438  	}
   439  
   440  	if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
   441  		f.addr2liner = addr2liner
   442  
   443  		// When addr2line encounters some gcc compiled binaries, it
   444  		// drops interesting parts of names in anonymous namespaces.
   445  		// Fallback to NM for better function names.
   446  		if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
   447  			f.addr2liner.nm = nm
   448  		}
   449  	}
   450  }
   451  
   452  func (f *fileAddr2Line) Close() error {
   453  	if f.llvmSymbolizer != nil {
   454  		f.llvmSymbolizer.rw.close()
   455  		f.llvmSymbolizer = nil
   456  	}
   457  	if f.addr2liner != nil {
   458  		f.addr2liner.rw.close()
   459  		f.addr2liner = nil
   460  	}
   461  	return nil
   462  }
   463  

View as plain text