Black Lives Matter. Support the Equal Justice Initiative.

Source file src/cmd/link/internal/ld/deadcode.go

Documentation: cmd/link/internal/ld

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ld
     6  
     7  import (
     8  	"cmd/internal/goobj"
     9  	"cmd/internal/objabi"
    10  	"cmd/internal/sys"
    11  	"cmd/link/internal/loader"
    12  	"cmd/link/internal/sym"
    13  	"fmt"
    14  	"unicode"
    15  )
    16  
    17  var _ = fmt.Print
    18  
    19  type deadcodePass struct {
    20  	ctxt *Link
    21  	ldr  *loader.Loader
    22  	wq   heap // work queue, using min-heap for beter locality
    23  
    24  	ifaceMethod     map[methodsig]bool // methods declared in reached interfaces
    25  	markableMethods []methodref        // methods of reached types
    26  	reflectSeen     bool               // whether we have seen a reflect method call
    27  	dynlink         bool
    28  
    29  	methodsigstmp []methodsig // scratch buffer for decoding method signatures
    30  }
    31  
    32  func (d *deadcodePass) init() {
    33  	d.ldr.InitReachable()
    34  	d.ifaceMethod = make(map[methodsig]bool)
    35  	if objabi.Fieldtrack_enabled != 0 {
    36  		d.ldr.Reachparent = make([]loader.Sym, d.ldr.NSym())
    37  	}
    38  	d.dynlink = d.ctxt.DynlinkingGo()
    39  
    40  	if d.ctxt.BuildMode == BuildModeShared {
    41  		// Mark all symbols defined in this library as reachable when
    42  		// building a shared library.
    43  		n := d.ldr.NDef()
    44  		for i := 1; i < n; i++ {
    45  			s := loader.Sym(i)
    46  			d.mark(s, 0)
    47  		}
    48  		return
    49  	}
    50  
    51  	var names []string
    52  
    53  	// In a normal binary, start at main.main and the init
    54  	// functions and mark what is reachable from there.
    55  	if d.ctxt.linkShared && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) {
    56  		names = append(names, "main.main", "main..inittask")
    57  	} else {
    58  		// The external linker refers main symbol directly.
    59  		if d.ctxt.LinkMode == LinkExternal && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) {
    60  			if d.ctxt.HeadType == objabi.Hwindows && d.ctxt.Arch.Family == sys.I386 {
    61  				*flagEntrySymbol = "_main"
    62  			} else {
    63  				*flagEntrySymbol = "main"
    64  			}
    65  		}
    66  		names = append(names, *flagEntrySymbol)
    67  		if !d.ctxt.linkShared && d.ctxt.BuildMode != BuildModePlugin {
    68  			// runtime.buildVersion and runtime.modinfo are referenced in .go.buildinfo section
    69  			// (see function buildinfo in data.go). They should normally be reachable from the
    70  			// runtime. Just make it explicit, in case.
    71  			names = append(names, "runtime.buildVersion", "runtime.modinfo")
    72  		}
    73  		if d.ctxt.BuildMode == BuildModePlugin {
    74  			names = append(names, objabi.PathToPrefix(*flagPluginPath)+"..inittask", objabi.PathToPrefix(*flagPluginPath)+".main", "go.plugin.tabs")
    75  
    76  			// We don't keep the go.plugin.exports symbol,
    77  			// but we do keep the symbols it refers to.
    78  			exportsIdx := d.ldr.Lookup("go.plugin.exports", 0)
    79  			if exportsIdx != 0 {
    80  				relocs := d.ldr.Relocs(exportsIdx)
    81  				for i := 0; i < relocs.Count(); i++ {
    82  					d.mark(relocs.At(i).Sym(), 0)
    83  				}
    84  			}
    85  		}
    86  	}
    87  
    88  	dynexpMap := d.ctxt.cgo_export_dynamic
    89  	if d.ctxt.LinkMode == LinkExternal {
    90  		dynexpMap = d.ctxt.cgo_export_static
    91  	}
    92  	for exp := range dynexpMap {
    93  		names = append(names, exp)
    94  	}
    95  
    96  	for _, name := range names {
    97  		// Mark symbol as a data/ABI0 symbol.
    98  		d.mark(d.ldr.Lookup(name, 0), 0)
    99  		// Also mark any Go functions (internal ABI).
   100  		d.mark(d.ldr.Lookup(name, sym.SymVerABIInternal), 0)
   101  	}
   102  }
   103  
   104  func (d *deadcodePass) flood() {
   105  	var methods []methodref
   106  	for !d.wq.empty() {
   107  		symIdx := d.wq.pop()
   108  
   109  		d.reflectSeen = d.reflectSeen || d.ldr.IsReflectMethod(symIdx)
   110  
   111  		isgotype := d.ldr.IsGoType(symIdx)
   112  		relocs := d.ldr.Relocs(symIdx)
   113  		var usedInIface bool
   114  
   115  		if isgotype {
   116  			if d.dynlink {
   117  				// When dynaamic linking, a type may be passed across DSO
   118  				// boundary and get converted to interface at the other side.
   119  				d.ldr.SetAttrUsedInIface(symIdx, true)
   120  			}
   121  			usedInIface = d.ldr.AttrUsedInIface(symIdx)
   122  		}
   123  
   124  		methods = methods[:0]
   125  		for i := 0; i < relocs.Count(); i++ {
   126  			r := relocs.At(i)
   127  			t := r.Type()
   128  			switch t {
   129  			case objabi.R_WEAKADDROFF:
   130  				continue
   131  			case objabi.R_METHODOFF:
   132  				if i+2 >= relocs.Count() {
   133  					panic("expect three consecutive R_METHODOFF relocs")
   134  				}
   135  				if usedInIface {
   136  					methods = append(methods, methodref{src: symIdx, r: i})
   137  					// The method descriptor is itself a type descriptor, and
   138  					// it can be used to reach other types, e.g. by using
   139  					// reflect.Type.Method(i).Type.In(j). We need to traverse
   140  					// its child types with UsedInIface set. (See also the
   141  					// comment below.)
   142  					rs := r.Sym()
   143  					if !d.ldr.AttrUsedInIface(rs) {
   144  						d.ldr.SetAttrUsedInIface(rs, true)
   145  						if d.ldr.AttrReachable(rs) {
   146  							d.ldr.SetAttrReachable(rs, false)
   147  							d.mark(rs, symIdx)
   148  						}
   149  					}
   150  				}
   151  				i += 2
   152  				continue
   153  			case objabi.R_USETYPE:
   154  				// type symbol used for DWARF. we need to load the symbol but it may not
   155  				// be otherwise reachable in the program.
   156  				// do nothing for now as we still load all type symbols.
   157  				continue
   158  			case objabi.R_USEIFACE:
   159  				// R_USEIFACE is a marker relocation that tells the linker the type is
   160  				// converted to an interface, i.e. should have UsedInIface set. See the
   161  				// comment below for why we need to unset the Reachable bit and re-mark it.
   162  				rs := r.Sym()
   163  				if !d.ldr.AttrUsedInIface(rs) {
   164  					d.ldr.SetAttrUsedInIface(rs, true)
   165  					if d.ldr.AttrReachable(rs) {
   166  						d.ldr.SetAttrReachable(rs, false)
   167  						d.mark(rs, symIdx)
   168  					}
   169  				}
   170  				continue
   171  			case objabi.R_USEIFACEMETHOD:
   172  				// R_USEIFACEMETHOD is a marker relocation that marks an interface
   173  				// method as used.
   174  				rs := r.Sym()
   175  				if d.ctxt.linkShared && (d.ldr.SymType(rs) == sym.SDYNIMPORT || d.ldr.SymType(rs) == sym.Sxxx) {
   176  					// Don't decode symbol from shared library (we'll mark all exported methods anyway).
   177  					// We check for both SDYNIMPORT and Sxxx because name-mangled symbols haven't
   178  					// been resolved at this point.
   179  					continue
   180  				}
   181  				m := d.decodeIfaceMethod(d.ldr, d.ctxt.Arch, rs, r.Add())
   182  				if d.ctxt.Debugvlog > 1 {
   183  					d.ctxt.Logf("reached iface method: %v\n", m)
   184  				}
   185  				d.ifaceMethod[m] = true
   186  				continue
   187  			}
   188  			rs := r.Sym()
   189  			if isgotype && usedInIface && d.ldr.IsGoType(rs) && !d.ldr.AttrUsedInIface(rs) {
   190  				// If a type is converted to an interface, it is possible to obtain an
   191  				// interface with a "child" type of it using reflection (e.g. obtain an
   192  				// interface of T from []chan T). We need to traverse its "child" types
   193  				// with UsedInIface attribute set.
   194  				// When visiting the child type (chan T in the example above), it will
   195  				// have UsedInIface set, so it in turn will mark and (re)visit its children
   196  				// (e.g. T above).
   197  				// We unset the reachable bit here, so if the child type is already visited,
   198  				// it will be visited again.
   199  				// Note that a type symbol can be visited at most twice, one without
   200  				// UsedInIface and one with. So termination is still guaranteed.
   201  				d.ldr.SetAttrUsedInIface(rs, true)
   202  				d.ldr.SetAttrReachable(rs, false)
   203  			}
   204  			d.mark(rs, symIdx)
   205  		}
   206  		naux := d.ldr.NAux(symIdx)
   207  		for i := 0; i < naux; i++ {
   208  			a := d.ldr.Aux(symIdx, i)
   209  			if a.Type() == goobj.AuxGotype {
   210  				// A symbol being reachable doesn't imply we need its
   211  				// type descriptor. Don't mark it.
   212  				continue
   213  			}
   214  			d.mark(a.Sym(), symIdx)
   215  		}
   216  		// Some host object symbols have an outer object, which acts like a
   217  		// "carrier" symbol, or it holds all the symbols for a particular
   218  		// section. We need to mark all "referenced" symbols from that carrier,
   219  		// so we make sure we're pulling in all outer symbols, and their sub
   220  		// symbols. This is not ideal, and these carrier/section symbols could
   221  		// be removed.
   222  		if d.ldr.IsExternal(symIdx) {
   223  			d.mark(d.ldr.OuterSym(symIdx), symIdx)
   224  			d.mark(d.ldr.SubSym(symIdx), symIdx)
   225  		}
   226  
   227  		if len(methods) != 0 {
   228  			if !isgotype {
   229  				panic("method found on non-type symbol")
   230  			}
   231  			// Decode runtime type information for type methods
   232  			// to help work out which methods can be called
   233  			// dynamically via interfaces.
   234  			methodsigs := d.decodetypeMethods(d.ldr, d.ctxt.Arch, symIdx, &relocs)
   235  			if len(methods) != len(methodsigs) {
   236  				panic(fmt.Sprintf("%q has %d method relocations for %d methods", d.ldr.SymName(symIdx), len(methods), len(methodsigs)))
   237  			}
   238  			for i, m := range methodsigs {
   239  				methods[i].m = m
   240  				if d.ctxt.Debugvlog > 1 {
   241  					d.ctxt.Logf("markable method: %v of sym %v %s\n", m, symIdx, d.ldr.SymName(symIdx))
   242  				}
   243  			}
   244  			d.markableMethods = append(d.markableMethods, methods...)
   245  		}
   246  	}
   247  }
   248  
   249  func (d *deadcodePass) mark(symIdx, parent loader.Sym) {
   250  	if symIdx != 0 && !d.ldr.AttrReachable(symIdx) {
   251  		d.wq.push(symIdx)
   252  		d.ldr.SetAttrReachable(symIdx, true)
   253  		if objabi.Fieldtrack_enabled != 0 && d.ldr.Reachparent[symIdx] == 0 {
   254  			d.ldr.Reachparent[symIdx] = parent
   255  		}
   256  		if *flagDumpDep {
   257  			to := d.ldr.SymName(symIdx)
   258  			if to != "" {
   259  				if d.ldr.AttrUsedInIface(symIdx) {
   260  					to += " <UsedInIface>"
   261  				}
   262  				from := "_"
   263  				if parent != 0 {
   264  					from = d.ldr.SymName(parent)
   265  					if d.ldr.AttrUsedInIface(parent) {
   266  						from += " <UsedInIface>"
   267  					}
   268  				}
   269  				fmt.Printf("%s -> %s\n", from, to)
   270  			}
   271  		}
   272  	}
   273  }
   274  
   275  func (d *deadcodePass) markMethod(m methodref) {
   276  	relocs := d.ldr.Relocs(m.src)
   277  	d.mark(relocs.At(m.r).Sym(), m.src)
   278  	d.mark(relocs.At(m.r+1).Sym(), m.src)
   279  	d.mark(relocs.At(m.r+2).Sym(), m.src)
   280  }
   281  
   282  // deadcode marks all reachable symbols.
   283  //
   284  // The basis of the dead code elimination is a flood fill of symbols,
   285  // following their relocations, beginning at *flagEntrySymbol.
   286  //
   287  // This flood fill is wrapped in logic for pruning unused methods.
   288  // All methods are mentioned by relocations on their receiver's *rtype.
   289  // These relocations are specially defined as R_METHODOFF by the compiler
   290  // so we can detect and manipulated them here.
   291  //
   292  // There are three ways a method of a reachable type can be invoked:
   293  //
   294  //	1. direct call
   295  //	2. through a reachable interface type
   296  //	3. reflect.Value.Method (or MethodByName), or reflect.Type.Method
   297  //	   (or MethodByName)
   298  //
   299  // The first case is handled by the flood fill, a directly called method
   300  // is marked as reachable.
   301  //
   302  // The second case is handled by decomposing all reachable interface
   303  // types into method signatures. Each encountered method is compared
   304  // against the interface method signatures, if it matches it is marked
   305  // as reachable. This is extremely conservative, but easy and correct.
   306  //
   307  // The third case is handled by looking to see if any of:
   308  //	- reflect.Value.Method or MethodByName is reachable
   309  // 	- reflect.Type.Method or MethodByName is called (through the
   310  // 	  REFLECTMETHOD attribute marked by the compiler).
   311  // If any of these happen, all bets are off and all exported methods
   312  // of reachable types are marked reachable.
   313  //
   314  // Any unreached text symbols are removed from ctxt.Textp.
   315  func deadcode(ctxt *Link) {
   316  	ldr := ctxt.loader
   317  	d := deadcodePass{ctxt: ctxt, ldr: ldr}
   318  	d.init()
   319  	d.flood()
   320  
   321  	methSym := ldr.Lookup("reflect.Value.Method", sym.SymVerABIInternal)
   322  	methByNameSym := ldr.Lookup("reflect.Value.MethodByName", sym.SymVerABIInternal)
   323  
   324  	if ctxt.DynlinkingGo() {
   325  		// Exported methods may satisfy interfaces we don't know
   326  		// about yet when dynamically linking.
   327  		d.reflectSeen = true
   328  	}
   329  
   330  	for {
   331  		// Methods might be called via reflection. Give up on
   332  		// static analysis, mark all exported methods of
   333  		// all reachable types as reachable.
   334  		d.reflectSeen = d.reflectSeen || (methSym != 0 && ldr.AttrReachable(methSym)) || (methByNameSym != 0 && ldr.AttrReachable(methByNameSym))
   335  
   336  		// Mark all methods that could satisfy a discovered
   337  		// interface as reachable. We recheck old marked interfaces
   338  		// as new types (with new methods) may have been discovered
   339  		// in the last pass.
   340  		rem := d.markableMethods[:0]
   341  		for _, m := range d.markableMethods {
   342  			if (d.reflectSeen && m.isExported()) || d.ifaceMethod[m.m] {
   343  				d.markMethod(m)
   344  			} else {
   345  				rem = append(rem, m)
   346  			}
   347  		}
   348  		d.markableMethods = rem
   349  
   350  		if d.wq.empty() {
   351  			// No new work was discovered. Done.
   352  			break
   353  		}
   354  		d.flood()
   355  	}
   356  }
   357  
   358  // methodsig is a typed method signature (name + type).
   359  type methodsig struct {
   360  	name string
   361  	typ  loader.Sym // type descriptor symbol of the function
   362  }
   363  
   364  // methodref holds the relocations from a receiver type symbol to its
   365  // method. There are three relocations, one for each of the fields in
   366  // the reflect.method struct: mtyp, ifn, and tfn.
   367  type methodref struct {
   368  	m   methodsig
   369  	src loader.Sym // receiver type symbol
   370  	r   int        // the index of R_METHODOFF relocations
   371  }
   372  
   373  func (m methodref) isExported() bool {
   374  	for _, r := range m.m.name {
   375  		return unicode.IsUpper(r)
   376  	}
   377  	panic("methodref has no signature")
   378  }
   379  
   380  // decodeMethodSig decodes an array of method signature information.
   381  // Each element of the array is size bytes. The first 4 bytes is a
   382  // nameOff for the method name, and the next 4 bytes is a typeOff for
   383  // the function type.
   384  //
   385  // Conveniently this is the layout of both runtime.method and runtime.imethod.
   386  func (d *deadcodePass) decodeMethodSig(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs, off, size, count int) []methodsig {
   387  	if cap(d.methodsigstmp) < count {
   388  		d.methodsigstmp = append(d.methodsigstmp[:0], make([]methodsig, count)...)
   389  	}
   390  	var methods = d.methodsigstmp[:count]
   391  	for i := 0; i < count; i++ {
   392  		methods[i].name = decodetypeName(ldr, symIdx, relocs, off)
   393  		methods[i].typ = decodeRelocSym(ldr, symIdx, relocs, int32(off+4))
   394  		off += size
   395  	}
   396  	return methods
   397  }
   398  
   399  // Decode the method of interface type symbol symIdx at offset off.
   400  func (d *deadcodePass) decodeIfaceMethod(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, off int64) methodsig {
   401  	p := ldr.Data(symIdx)
   402  	if decodetypeKind(arch, p)&kindMask != kindInterface {
   403  		panic(fmt.Sprintf("symbol %q is not an interface", ldr.SymName(symIdx)))
   404  	}
   405  	relocs := ldr.Relocs(symIdx)
   406  	var m methodsig
   407  	m.name = decodetypeName(ldr, symIdx, &relocs, int(off))
   408  	m.typ = decodeRelocSym(ldr, symIdx, &relocs, int32(off+4))
   409  	return m
   410  }
   411  
   412  func (d *deadcodePass) decodetypeMethods(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs) []methodsig {
   413  	p := ldr.Data(symIdx)
   414  	if !decodetypeHasUncommon(arch, p) {
   415  		panic(fmt.Sprintf("no methods on %q", ldr.SymName(symIdx)))
   416  	}
   417  	off := commonsize(arch) // reflect.rtype
   418  	switch decodetypeKind(arch, p) & kindMask {
   419  	case kindStruct: // reflect.structType
   420  		off += 4 * arch.PtrSize
   421  	case kindPtr: // reflect.ptrType
   422  		off += arch.PtrSize
   423  	case kindFunc: // reflect.funcType
   424  		off += arch.PtrSize // 4 bytes, pointer aligned
   425  	case kindSlice: // reflect.sliceType
   426  		off += arch.PtrSize
   427  	case kindArray: // reflect.arrayType
   428  		off += 3 * arch.PtrSize
   429  	case kindChan: // reflect.chanType
   430  		off += 2 * arch.PtrSize
   431  	case kindMap: // reflect.mapType
   432  		off += 4*arch.PtrSize + 8
   433  	case kindInterface: // reflect.interfaceType
   434  		off += 3 * arch.PtrSize
   435  	default:
   436  		// just Sizeof(rtype)
   437  	}
   438  
   439  	mcount := int(decodeInuxi(arch, p[off+4:], 2))
   440  	moff := int(decodeInuxi(arch, p[off+4+2+2:], 4))
   441  	off += moff                // offset to array of reflect.method values
   442  	const sizeofMethod = 4 * 4 // sizeof reflect.method in program
   443  	return d.decodeMethodSig(ldr, arch, symIdx, relocs, off, sizeofMethod, mcount)
   444  }
   445  

View as plain text