Source file src/cmd/compile/internal/ssa/rewrite.go

Documentation: cmd/compile/internal/ssa

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/obj"
    10  	"cmd/internal/objabi"
    11  	"cmd/internal/src"
    12  	"encoding/binary"
    13  	"fmt"
    14  	"io"
    15  	"math"
    16  	"math/bits"
    17  	"os"
    18  	"path/filepath"
    19  )
    20  
    21  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) {
    22  	// repeat rewrites until we find no more rewrites
    23  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    24  	pendingLines.clear()
    25  	for {
    26  		change := false
    27  		for _, b := range f.Blocks {
    28  			if b.Control != nil && b.Control.Op == OpCopy {
    29  				for b.Control.Op == OpCopy {
    30  					b.SetControl(b.Control.Args[0])
    31  				}
    32  			}
    33  			if rb(b) {
    34  				change = true
    35  			}
    36  			for j, v := range b.Values {
    37  				change = phielimValue(v) || change
    38  
    39  				// Eliminate copy inputs.
    40  				// If any copy input becomes unused, mark it
    41  				// as invalid and discard its argument. Repeat
    42  				// recursively on the discarded argument.
    43  				// This phase helps remove phantom "dead copy" uses
    44  				// of a value so that a x.Uses==1 rule condition
    45  				// fires reliably.
    46  				for i, a := range v.Args {
    47  					if a.Op != OpCopy {
    48  						continue
    49  					}
    50  					aa := copySource(a)
    51  					v.SetArg(i, aa)
    52  					// If a, a copy, has a line boundary indicator, attempt to find a new value
    53  					// to hold it.  The first candidate is the value that will replace a (aa),
    54  					// if it shares the same block and line and is eligible.
    55  					// The second option is v, which has a as an input.  Because aa is earlier in
    56  					// the data flow, it is the better choice.
    57  					if a.Pos.IsStmt() == src.PosIsStmt {
    58  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
    59  							aa.Pos = aa.Pos.WithIsStmt()
    60  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
    61  							v.Pos = v.Pos.WithIsStmt()
    62  						} else {
    63  							// Record the lost line and look for a new home after all rewrites are complete.
    64  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
    65  							// line to appear in more than one block, but only one block is stored, so if both end
    66  							// up here, then one will be lost.
    67  							pendingLines.set(a.Pos, int32(a.Block.ID))
    68  						}
    69  						a.Pos = a.Pos.WithNotStmt()
    70  					}
    71  					change = true
    72  					for a.Uses == 0 {
    73  						b := a.Args[0]
    74  						a.reset(OpInvalid)
    75  						a = b
    76  					}
    77  				}
    78  
    79  				// apply rewrite function
    80  				if rv(v) {
    81  					change = true
    82  					// If value changed to a poor choice for a statement boundary, move the boundary
    83  					if v.Pos.IsStmt() == src.PosIsStmt {
    84  						if k := nextGoodStatementIndex(v, j, b); k != j {
    85  							v.Pos = v.Pos.WithNotStmt()
    86  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
    87  						}
    88  					}
    89  				}
    90  			}
    91  		}
    92  		if !change {
    93  			break
    94  		}
    95  	}
    96  	// remove clobbered values
    97  	for _, b := range f.Blocks {
    98  		j := 0
    99  		for i, v := range b.Values {
   100  			vl := v.Pos
   101  			if v.Op == OpInvalid {
   102  				if v.Pos.IsStmt() == src.PosIsStmt {
   103  					pendingLines.set(vl, int32(b.ID))
   104  				}
   105  				f.freeValue(v)
   106  				continue
   107  			}
   108  			if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
   109  				pendingLines.remove(vl)
   110  				v.Pos = v.Pos.WithIsStmt()
   111  			}
   112  			if i != j {
   113  				b.Values[j] = v
   114  			}
   115  			j++
   116  		}
   117  		if pendingLines.get(b.Pos) == int32(b.ID) {
   118  			b.Pos = b.Pos.WithIsStmt()
   119  			pendingLines.remove(b.Pos)
   120  		}
   121  		if j != len(b.Values) {
   122  			tail := b.Values[j:]
   123  			for j := range tail {
   124  				tail[j] = nil
   125  			}
   126  			b.Values = b.Values[:j]
   127  		}
   128  	}
   129  }
   130  
   131  // Common functions called from rewriting rules
   132  
   133  func is64BitFloat(t *types.Type) bool {
   134  	return t.Size() == 8 && t.IsFloat()
   135  }
   136  
   137  func is32BitFloat(t *types.Type) bool {
   138  	return t.Size() == 4 && t.IsFloat()
   139  }
   140  
   141  func is64BitInt(t *types.Type) bool {
   142  	return t.Size() == 8 && t.IsInteger()
   143  }
   144  
   145  func is32BitInt(t *types.Type) bool {
   146  	return t.Size() == 4 && t.IsInteger()
   147  }
   148  
   149  func is16BitInt(t *types.Type) bool {
   150  	return t.Size() == 2 && t.IsInteger()
   151  }
   152  
   153  func is8BitInt(t *types.Type) bool {
   154  	return t.Size() == 1 && t.IsInteger()
   155  }
   156  
   157  func isPtr(t *types.Type) bool {
   158  	return t.IsPtrShaped()
   159  }
   160  
   161  func isSigned(t *types.Type) bool {
   162  	return t.IsSigned()
   163  }
   164  
   165  // mergeSym merges two symbolic offsets. There is no real merging of
   166  // offsets, we just pick the non-nil one.
   167  func mergeSym(x, y interface{}) interface{} {
   168  	if x == nil {
   169  		return y
   170  	}
   171  	if y == nil {
   172  		return x
   173  	}
   174  	panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
   175  }
   176  func canMergeSym(x, y interface{}) bool {
   177  	return x == nil || y == nil
   178  }
   179  
   180  // canMergeLoadClobber reports whether the load can be merged into target without
   181  // invalidating the schedule.
   182  // It also checks that the other non-load argument x is something we
   183  // are ok with clobbering.
   184  func canMergeLoadClobber(target, load, x *Value) bool {
   185  	// The register containing x is going to get clobbered.
   186  	// Don't merge if we still need the value of x.
   187  	// We don't have liveness information here, but we can
   188  	// approximate x dying with:
   189  	//  1) target is x's only use.
   190  	//  2) target is not in a deeper loop than x.
   191  	if x.Uses != 1 {
   192  		return false
   193  	}
   194  	loopnest := x.Block.Func.loopnest()
   195  	loopnest.calculateDepths()
   196  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   197  		return false
   198  	}
   199  	return canMergeLoad(target, load)
   200  }
   201  
   202  // canMergeLoad reports whether the load can be merged into target without
   203  // invalidating the schedule.
   204  func canMergeLoad(target, load *Value) bool {
   205  	if target.Block.ID != load.Block.ID {
   206  		// If the load is in a different block do not merge it.
   207  		return false
   208  	}
   209  
   210  	// We can't merge the load into the target if the load
   211  	// has more than one use.
   212  	if load.Uses != 1 {
   213  		return false
   214  	}
   215  
   216  	mem := load.MemoryArg()
   217  
   218  	// We need the load's memory arg to still be alive at target. That
   219  	// can't be the case if one of target's args depends on a memory
   220  	// state that is a successor of load's memory arg.
   221  	//
   222  	// For example, it would be invalid to merge load into target in
   223  	// the following situation because newmem has killed oldmem
   224  	// before target is reached:
   225  	//     load = read ... oldmem
   226  	//   newmem = write ... oldmem
   227  	//     arg0 = read ... newmem
   228  	//   target = add arg0 load
   229  	//
   230  	// If the argument comes from a different block then we can exclude
   231  	// it immediately because it must dominate load (which is in the
   232  	// same block as target).
   233  	var args []*Value
   234  	for _, a := range target.Args {
   235  		if a != load && a.Block.ID == target.Block.ID {
   236  			args = append(args, a)
   237  		}
   238  	}
   239  
   240  	// memPreds contains memory states known to be predecessors of load's
   241  	// memory state. It is lazily initialized.
   242  	var memPreds map[*Value]bool
   243  	for i := 0; len(args) > 0; i++ {
   244  		const limit = 100
   245  		if i >= limit {
   246  			// Give up if we have done a lot of iterations.
   247  			return false
   248  		}
   249  		v := args[len(args)-1]
   250  		args = args[:len(args)-1]
   251  		if target.Block.ID != v.Block.ID {
   252  			// Since target and load are in the same block
   253  			// we can stop searching when we leave the block.
   254  			continue
   255  		}
   256  		if v.Op == OpPhi {
   257  			// A Phi implies we have reached the top of the block.
   258  			// The memory phi, if it exists, is always
   259  			// the first logical store in the block.
   260  			continue
   261  		}
   262  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   263  			// We could handle this situation however it is likely
   264  			// to be very rare.
   265  			return false
   266  		}
   267  		if v.Op.SymEffect()&SymAddr != 0 {
   268  			// This case prevents an operation that calculates the
   269  			// address of a local variable from being forced to schedule
   270  			// before its corresponding VarDef.
   271  			// See issue 28445.
   272  			//   v1 = LOAD ...
   273  			//   v2 = VARDEF
   274  			//   v3 = LEAQ
   275  			//   v4 = CMPQ v1 v3
   276  			// We don't want to combine the CMPQ with the load, because
   277  			// that would force the CMPQ to schedule before the VARDEF, which
   278  			// in turn requires the LEAQ to schedule before the VARDEF.
   279  			return false
   280  		}
   281  		if v.Type.IsMemory() {
   282  			if memPreds == nil {
   283  				// Initialise a map containing memory states
   284  				// known to be predecessors of load's memory
   285  				// state.
   286  				memPreds = make(map[*Value]bool)
   287  				m := mem
   288  				const limit = 50
   289  				for i := 0; i < limit; i++ {
   290  					if m.Op == OpPhi {
   291  						// The memory phi, if it exists, is always
   292  						// the first logical store in the block.
   293  						break
   294  					}
   295  					if m.Block.ID != target.Block.ID {
   296  						break
   297  					}
   298  					if !m.Type.IsMemory() {
   299  						break
   300  					}
   301  					memPreds[m] = true
   302  					if len(m.Args) == 0 {
   303  						break
   304  					}
   305  					m = m.MemoryArg()
   306  				}
   307  			}
   308  
   309  			// We can merge if v is a predecessor of mem.
   310  			//
   311  			// For example, we can merge load into target in the
   312  			// following scenario:
   313  			//      x = read ... v
   314  			//    mem = write ... v
   315  			//   load = read ... mem
   316  			// target = add x load
   317  			if memPreds[v] {
   318  				continue
   319  			}
   320  			return false
   321  		}
   322  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   323  			// If v takes mem as an input then we know mem
   324  			// is valid at this point.
   325  			continue
   326  		}
   327  		for _, a := range v.Args {
   328  			if target.Block.ID == a.Block.ID {
   329  				args = append(args, a)
   330  			}
   331  		}
   332  	}
   333  
   334  	return true
   335  }
   336  
   337  // isSameSym reports whether sym is the same as the given named symbol
   338  func isSameSym(sym interface{}, name string) bool {
   339  	s, ok := sym.(fmt.Stringer)
   340  	return ok && s.String() == name
   341  }
   342  
   343  // nlz returns the number of leading zeros.
   344  func nlz(x int64) int64 {
   345  	return int64(bits.LeadingZeros64(uint64(x)))
   346  }
   347  
   348  // ntz returns the number of trailing zeros.
   349  func ntz(x int64) int64 {
   350  	return int64(bits.TrailingZeros64(uint64(x)))
   351  }
   352  
   353  func oneBit(x int64) bool {
   354  	return bits.OnesCount64(uint64(x)) == 1
   355  }
   356  
   357  // nlo returns the number of leading ones.
   358  func nlo(x int64) int64 {
   359  	return nlz(^x)
   360  }
   361  
   362  // nto returns the number of trailing ones.
   363  func nto(x int64) int64 {
   364  	return ntz(^x)
   365  }
   366  
   367  // log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
   368  // Rounds down.
   369  func log2(n int64) int64 {
   370  	return int64(bits.Len64(uint64(n))) - 1
   371  }
   372  
   373  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   374  // Rounds down.
   375  func log2uint32(n int64) int64 {
   376  	return int64(bits.Len32(uint32(n))) - 1
   377  }
   378  
   379  // isPowerOfTwo reports whether n is a power of 2.
   380  func isPowerOfTwo(n int64) bool {
   381  	return n > 0 && n&(n-1) == 0
   382  }
   383  
   384  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   385  func isUint64PowerOfTwo(in int64) bool {
   386  	n := uint64(in)
   387  	return n > 0 && n&(n-1) == 0
   388  }
   389  
   390  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   391  func isUint32PowerOfTwo(in int64) bool {
   392  	n := uint64(uint32(in))
   393  	return n > 0 && n&(n-1) == 0
   394  }
   395  
   396  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   397  func is32Bit(n int64) bool {
   398  	return n == int64(int32(n))
   399  }
   400  
   401  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   402  func is16Bit(n int64) bool {
   403  	return n == int64(int16(n))
   404  }
   405  
   406  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   407  func isU12Bit(n int64) bool {
   408  	return 0 <= n && n < (1<<12)
   409  }
   410  
   411  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   412  func isU16Bit(n int64) bool {
   413  	return n == int64(uint16(n))
   414  }
   415  
   416  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   417  func isU32Bit(n int64) bool {
   418  	return n == int64(uint32(n))
   419  }
   420  
   421  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   422  func is20Bit(n int64) bool {
   423  	return -(1<<19) <= n && n < (1<<19)
   424  }
   425  
   426  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   427  func b2i(b bool) int64 {
   428  	if b {
   429  		return 1
   430  	}
   431  	return 0
   432  }
   433  
   434  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   435  // A shift is bounded if it is shifting by less than the width of the shifted value.
   436  func shiftIsBounded(v *Value) bool {
   437  	return v.AuxInt != 0
   438  }
   439  
   440  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   441  // of the mantissa. It will panic if the truncation results in lost information.
   442  func truncate64Fto32F(f float64) float32 {
   443  	if !isExactFloat32(f) {
   444  		panic("truncate64Fto32F: truncation is not exact")
   445  	}
   446  	if !math.IsNaN(f) {
   447  		return float32(f)
   448  	}
   449  	// NaN bit patterns aren't necessarily preserved across conversion
   450  	// instructions so we need to do the conversion manually.
   451  	b := math.Float64bits(f)
   452  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   453  	//          | sign                  | exponent   | mantissa       |
   454  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   455  	return math.Float32frombits(r)
   456  }
   457  
   458  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   459  // pattern of the mantissa.
   460  func extend32Fto64F(f float32) float64 {
   461  	if !math.IsNaN(float64(f)) {
   462  		return float64(f)
   463  	}
   464  	// NaN bit patterns aren't necessarily preserved across conversion
   465  	// instructions so we need to do the conversion manually.
   466  	b := uint64(math.Float32bits(f))
   467  	//   | sign                  | exponent      | mantissa                    |
   468  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   469  	return math.Float64frombits(r)
   470  }
   471  
   472  // NeedsFixUp reports whether the division needs fix-up code.
   473  func NeedsFixUp(v *Value) bool {
   474  	return v.AuxInt == 0
   475  }
   476  
   477  // i2f is used in rules for converting from an AuxInt to a float.
   478  func i2f(i int64) float64 {
   479  	return math.Float64frombits(uint64(i))
   480  }
   481  
   482  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   483  func auxFrom64F(f float64) int64 {
   484  	return int64(math.Float64bits(f))
   485  }
   486  
   487  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   488  func auxFrom32F(f float32) int64 {
   489  	return int64(math.Float64bits(extend32Fto64F(f)))
   490  }
   491  
   492  // auxTo32F decodes a float32 from the AuxInt value provided.
   493  func auxTo32F(i int64) float32 {
   494  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   495  }
   496  
   497  // auxTo64F decodes a float64 from the AuxInt value provided.
   498  func auxTo64F(i int64) float64 {
   499  	return math.Float64frombits(uint64(i))
   500  }
   501  
   502  // uaddOvf reports whether unsigned a+b would overflow.
   503  func uaddOvf(a, b int64) bool {
   504  	return uint64(a)+uint64(b) < uint64(a)
   505  }
   506  
   507  // de-virtualize an InterCall
   508  // 'sym' is the symbol for the itab
   509  func devirt(v *Value, sym interface{}, offset int64) *obj.LSym {
   510  	f := v.Block.Func
   511  	n, ok := sym.(*obj.LSym)
   512  	if !ok {
   513  		return nil
   514  	}
   515  	lsym := f.fe.DerefItab(n, offset)
   516  	if f.pass.debug > 0 {
   517  		if lsym != nil {
   518  			f.Warnl(v.Pos, "de-virtualizing call")
   519  		} else {
   520  			f.Warnl(v.Pos, "couldn't de-virtualize call")
   521  		}
   522  	}
   523  	return lsym
   524  }
   525  
   526  // isSamePtr reports whether p1 and p2 point to the same address.
   527  func isSamePtr(p1, p2 *Value) bool {
   528  	if p1 == p2 {
   529  		return true
   530  	}
   531  	if p1.Op != p2.Op {
   532  		return false
   533  	}
   534  	switch p1.Op {
   535  	case OpOffPtr:
   536  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   537  	case OpAddr, OpLocalAddr:
   538  		// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
   539  		// Checking for value equality only works after [z]cse has run.
   540  		return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
   541  	case OpAddPtr:
   542  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   543  	}
   544  	return false
   545  }
   546  
   547  func isStackPtr(v *Value) bool {
   548  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   549  		v = v.Args[0]
   550  	}
   551  	return v.Op == OpSP || v.Op == OpLocalAddr
   552  }
   553  
   554  // disjoint reports whether the memory region specified by [p1:p1+n1)
   555  // does not overlap with [p2:p2+n2).
   556  // A return value of false does not imply the regions overlap.
   557  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   558  	if n1 == 0 || n2 == 0 {
   559  		return true
   560  	}
   561  	if p1 == p2 {
   562  		return false
   563  	}
   564  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   565  		base, offset = ptr, 0
   566  		for base.Op == OpOffPtr {
   567  			offset += base.AuxInt
   568  			base = base.Args[0]
   569  		}
   570  		return base, offset
   571  	}
   572  	p1, off1 := baseAndOffset(p1)
   573  	p2, off2 := baseAndOffset(p2)
   574  	if isSamePtr(p1, p2) {
   575  		return !overlap(off1, n1, off2, n2)
   576  	}
   577  	// p1 and p2 are not the same, so if they are both OpAddrs then
   578  	// they point to different variables.
   579  	// If one pointer is on the stack and the other is an argument
   580  	// then they can't overlap.
   581  	switch p1.Op {
   582  	case OpAddr, OpLocalAddr:
   583  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   584  			return true
   585  		}
   586  		return p2.Op == OpArg && p1.Args[0].Op == OpSP
   587  	case OpArg:
   588  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   589  			return true
   590  		}
   591  	case OpSP:
   592  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
   593  	}
   594  	return false
   595  }
   596  
   597  // moveSize returns the number of bytes an aligned MOV instruction moves
   598  func moveSize(align int64, c *Config) int64 {
   599  	switch {
   600  	case align%8 == 0 && c.PtrSize == 8:
   601  		return 8
   602  	case align%4 == 0:
   603  		return 4
   604  	case align%2 == 0:
   605  		return 2
   606  	}
   607  	return 1
   608  }
   609  
   610  // mergePoint finds a block among a's blocks which dominates b and is itself
   611  // dominated by all of a's blocks. Returns nil if it can't find one.
   612  // Might return nil even if one does exist.
   613  func mergePoint(b *Block, a ...*Value) *Block {
   614  	// Walk backward from b looking for one of the a's blocks.
   615  
   616  	// Max distance
   617  	d := 100
   618  
   619  	for d > 0 {
   620  		for _, x := range a {
   621  			if b == x.Block {
   622  				goto found
   623  			}
   624  		}
   625  		if len(b.Preds) > 1 {
   626  			// Don't know which way to go back. Abort.
   627  			return nil
   628  		}
   629  		b = b.Preds[0].b
   630  		d--
   631  	}
   632  	return nil // too far away
   633  found:
   634  	// At this point, r is the first value in a that we find by walking backwards.
   635  	// if we return anything, r will be it.
   636  	r := b
   637  
   638  	// Keep going, counting the other a's that we find. They must all dominate r.
   639  	na := 0
   640  	for d > 0 {
   641  		for _, x := range a {
   642  			if b == x.Block {
   643  				na++
   644  			}
   645  		}
   646  		if na == len(a) {
   647  			// Found all of a in a backwards walk. We can return r.
   648  			return r
   649  		}
   650  		if len(b.Preds) > 1 {
   651  			return nil
   652  		}
   653  		b = b.Preds[0].b
   654  		d--
   655  
   656  	}
   657  	return nil // too far away
   658  }
   659  
   660  // clobber invalidates v.  Returns true.
   661  // clobber is used by rewrite rules to:
   662  //   A) make sure v is really dead and never used again.
   663  //   B) decrement use counts of v's args.
   664  func clobber(v *Value) bool {
   665  	v.reset(OpInvalid)
   666  	// Note: leave v.Block intact.  The Block field is used after clobber.
   667  	return true
   668  }
   669  
   670  // clobberIfDead resets v when use count is 1. Returns true.
   671  // clobberIfDead is used by rewrite rules to decrement
   672  // use counts of v's args when v is dead and never used.
   673  func clobberIfDead(v *Value) bool {
   674  	if v.Uses == 1 {
   675  		v.reset(OpInvalid)
   676  	}
   677  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   678  	return true
   679  }
   680  
   681  // noteRule is an easy way to track if a rule is matched when writing
   682  // new ones.  Make the rule of interest also conditional on
   683  //     noteRule("note to self: rule of interest matched")
   684  // and that message will print when the rule matches.
   685  func noteRule(s string) bool {
   686  	fmt.Println(s)
   687  	return true
   688  }
   689  
   690  // countRule increments Func.ruleMatches[key].
   691  // If Func.ruleMatches is non-nil at the end
   692  // of compilation, it will be printed to stdout.
   693  // This is intended to make it easier to find which functions
   694  // which contain lots of rules matches when developing new rules.
   695  func countRule(v *Value, key string) bool {
   696  	f := v.Block.Func
   697  	if f.ruleMatches == nil {
   698  		f.ruleMatches = make(map[string]int)
   699  	}
   700  	f.ruleMatches[key]++
   701  	return true
   702  }
   703  
   704  // warnRule generates compiler debug output with string s when
   705  // v is not in autogenerated code, cond is true and the rule has fired.
   706  func warnRule(cond bool, v *Value, s string) bool {
   707  	if pos := v.Pos; pos.Line() > 1 && cond {
   708  		v.Block.Func.Warnl(pos, s)
   709  	}
   710  	return true
   711  }
   712  
   713  // for a pseudo-op like (LessThan x), extract x
   714  func flagArg(v *Value) *Value {
   715  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
   716  		return nil
   717  	}
   718  	return v.Args[0]
   719  }
   720  
   721  // arm64Negate finds the complement to an ARM64 condition code,
   722  // for example Equal -> NotEqual or LessThan -> GreaterEqual
   723  //
   724  // TODO: add floating-point conditions
   725  func arm64Negate(op Op) Op {
   726  	switch op {
   727  	case OpARM64LessThan:
   728  		return OpARM64GreaterEqual
   729  	case OpARM64LessThanU:
   730  		return OpARM64GreaterEqualU
   731  	case OpARM64GreaterThan:
   732  		return OpARM64LessEqual
   733  	case OpARM64GreaterThanU:
   734  		return OpARM64LessEqualU
   735  	case OpARM64LessEqual:
   736  		return OpARM64GreaterThan
   737  	case OpARM64LessEqualU:
   738  		return OpARM64GreaterThanU
   739  	case OpARM64GreaterEqual:
   740  		return OpARM64LessThan
   741  	case OpARM64GreaterEqualU:
   742  		return OpARM64LessThanU
   743  	case OpARM64Equal:
   744  		return OpARM64NotEqual
   745  	case OpARM64NotEqual:
   746  		return OpARM64Equal
   747  	case OpARM64LessThanF:
   748  		return OpARM64GreaterEqualF
   749  	case OpARM64GreaterThanF:
   750  		return OpARM64LessEqualF
   751  	case OpARM64LessEqualF:
   752  		return OpARM64GreaterThanF
   753  	case OpARM64GreaterEqualF:
   754  		return OpARM64LessThanF
   755  	default:
   756  		panic("unreachable")
   757  	}
   758  }
   759  
   760  // arm64Invert evaluates (InvertFlags op), which
   761  // is the same as altering the condition codes such
   762  // that the same result would be produced if the arguments
   763  // to the flag-generating instruction were reversed, e.g.
   764  // (InvertFlags (CMP x y)) -> (CMP y x)
   765  //
   766  // TODO: add floating-point conditions
   767  func arm64Invert(op Op) Op {
   768  	switch op {
   769  	case OpARM64LessThan:
   770  		return OpARM64GreaterThan
   771  	case OpARM64LessThanU:
   772  		return OpARM64GreaterThanU
   773  	case OpARM64GreaterThan:
   774  		return OpARM64LessThan
   775  	case OpARM64GreaterThanU:
   776  		return OpARM64LessThanU
   777  	case OpARM64LessEqual:
   778  		return OpARM64GreaterEqual
   779  	case OpARM64LessEqualU:
   780  		return OpARM64GreaterEqualU
   781  	case OpARM64GreaterEqual:
   782  		return OpARM64LessEqual
   783  	case OpARM64GreaterEqualU:
   784  		return OpARM64LessEqualU
   785  	case OpARM64Equal, OpARM64NotEqual:
   786  		return op
   787  	case OpARM64LessThanF:
   788  		return OpARM64GreaterThanF
   789  	case OpARM64GreaterThanF:
   790  		return OpARM64LessThanF
   791  	case OpARM64LessEqualF:
   792  		return OpARM64GreaterEqualF
   793  	case OpARM64GreaterEqualF:
   794  		return OpARM64LessEqualF
   795  	default:
   796  		panic("unreachable")
   797  	}
   798  }
   799  
   800  // evaluate an ARM64 op against a flags value
   801  // that is potentially constant; return 1 for true,
   802  // -1 for false, and 0 for not constant.
   803  func ccARM64Eval(cc interface{}, flags *Value) int {
   804  	op := cc.(Op)
   805  	fop := flags.Op
   806  	switch fop {
   807  	case OpARM64InvertFlags:
   808  		return -ccARM64Eval(op, flags.Args[0])
   809  	case OpARM64FlagEQ:
   810  		switch op {
   811  		case OpARM64Equal, OpARM64GreaterEqual, OpARM64LessEqual,
   812  			OpARM64GreaterEqualU, OpARM64LessEqualU:
   813  			return 1
   814  		default:
   815  			return -1
   816  		}
   817  	case OpARM64FlagLT_ULT:
   818  		switch op {
   819  		case OpARM64LessThan, OpARM64LessThanU,
   820  			OpARM64LessEqual, OpARM64LessEqualU:
   821  			return 1
   822  		default:
   823  			return -1
   824  		}
   825  	case OpARM64FlagLT_UGT:
   826  		switch op {
   827  		case OpARM64LessThan, OpARM64GreaterThanU,
   828  			OpARM64LessEqual, OpARM64GreaterEqualU:
   829  			return 1
   830  		default:
   831  			return -1
   832  		}
   833  	case OpARM64FlagGT_ULT:
   834  		switch op {
   835  		case OpARM64GreaterThan, OpARM64LessThanU,
   836  			OpARM64GreaterEqual, OpARM64LessEqualU:
   837  			return 1
   838  		default:
   839  			return -1
   840  		}
   841  	case OpARM64FlagGT_UGT:
   842  		switch op {
   843  		case OpARM64GreaterThan, OpARM64GreaterThanU,
   844  			OpARM64GreaterEqual, OpARM64GreaterEqualU:
   845  			return 1
   846  		default:
   847  			return -1
   848  		}
   849  	default:
   850  		return 0
   851  	}
   852  }
   853  
   854  // logRule logs the use of the rule s. This will only be enabled if
   855  // rewrite rules were generated with the -log option, see gen/rulegen.go.
   856  func logRule(s string) {
   857  	if ruleFile == nil {
   858  		// Open a log file to write log to. We open in append
   859  		// mode because all.bash runs the compiler lots of times,
   860  		// and we want the concatenation of all of those logs.
   861  		// This means, of course, that users need to rm the old log
   862  		// to get fresh data.
   863  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
   864  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
   865  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
   866  		if err != nil {
   867  			panic(err)
   868  		}
   869  		ruleFile = w
   870  	}
   871  	_, err := fmt.Fprintln(ruleFile, s)
   872  	if err != nil {
   873  		panic(err)
   874  	}
   875  }
   876  
   877  var ruleFile io.Writer
   878  
   879  func min(x, y int64) int64 {
   880  	if x < y {
   881  		return x
   882  	}
   883  	return y
   884  }
   885  
   886  func isConstZero(v *Value) bool {
   887  	switch v.Op {
   888  	case OpConstNil:
   889  		return true
   890  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
   891  		return v.AuxInt == 0
   892  	}
   893  	return false
   894  }
   895  
   896  // reciprocalExact64 reports whether 1/c is exactly representable.
   897  func reciprocalExact64(c float64) bool {
   898  	b := math.Float64bits(c)
   899  	man := b & (1<<52 - 1)
   900  	if man != 0 {
   901  		return false // not a power of 2, denormal, or NaN
   902  	}
   903  	exp := b >> 52 & (1<<11 - 1)
   904  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
   905  	// changes the exponent to 0x7fe-exp.
   906  	switch exp {
   907  	case 0:
   908  		return false // ±0
   909  	case 0x7ff:
   910  		return false // ±inf
   911  	case 0x7fe:
   912  		return false // exponent is not representable
   913  	default:
   914  		return true
   915  	}
   916  }
   917  
   918  // reciprocalExact32 reports whether 1/c is exactly representable.
   919  func reciprocalExact32(c float32) bool {
   920  	b := math.Float32bits(c)
   921  	man := b & (1<<23 - 1)
   922  	if man != 0 {
   923  		return false // not a power of 2, denormal, or NaN
   924  	}
   925  	exp := b >> 23 & (1<<8 - 1)
   926  	// exponent bias is 0x7f.  So taking the reciprocal of a number
   927  	// changes the exponent to 0xfe-exp.
   928  	switch exp {
   929  	case 0:
   930  		return false // ±0
   931  	case 0xff:
   932  		return false // ±inf
   933  	case 0xfe:
   934  		return false // exponent is not representable
   935  	default:
   936  		return true
   937  	}
   938  }
   939  
   940  // check if an immediate can be directly encoded into an ARM's instruction
   941  func isARMImmRot(v uint32) bool {
   942  	for i := 0; i < 16; i++ {
   943  		if v&^0xff == 0 {
   944  			return true
   945  		}
   946  		v = v<<2 | v>>30
   947  	}
   948  
   949  	return false
   950  }
   951  
   952  // overlap reports whether the ranges given by the given offset and
   953  // size pairs overlap.
   954  func overlap(offset1, size1, offset2, size2 int64) bool {
   955  	if offset1 >= offset2 && offset2+size2 > offset1 {
   956  		return true
   957  	}
   958  	if offset2 >= offset1 && offset1+size1 > offset2 {
   959  		return true
   960  	}
   961  	return false
   962  }
   963  
   964  func areAdjacentOffsets(off1, off2, size int64) bool {
   965  	return off1+size == off2 || off1 == off2+size
   966  }
   967  
   968  // check if value zeroes out upper 32-bit of 64-bit register.
   969  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
   970  // because it catches same amount of cases as 4.
   971  func zeroUpper32Bits(x *Value, depth int) bool {
   972  	switch x.Op {
   973  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
   974  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
   975  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
   976  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
   977  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
   978  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
   979  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL:
   980  		return true
   981  	case OpArg:
   982  		return x.Type.Width == 4
   983  	case OpPhi, OpSelect0, OpSelect1:
   984  		// Phis can use each-other as an arguments, instead of tracking visited values,
   985  		// just limit recursion depth.
   986  		if depth <= 0 {
   987  			return false
   988  		}
   989  		for i := range x.Args {
   990  			if !zeroUpper32Bits(x.Args[i], depth-1) {
   991  				return false
   992  			}
   993  		}
   994  		return true
   995  
   996  	}
   997  	return false
   998  }
   999  
  1000  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
  1001  func zeroUpper48Bits(x *Value, depth int) bool {
  1002  	switch x.Op {
  1003  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1004  		return true
  1005  	case OpArg:
  1006  		return x.Type.Width == 2
  1007  	case OpPhi, OpSelect0, OpSelect1:
  1008  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1009  		// just limit recursion depth.
  1010  		if depth <= 0 {
  1011  			return false
  1012  		}
  1013  		for i := range x.Args {
  1014  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1015  				return false
  1016  			}
  1017  		}
  1018  		return true
  1019  
  1020  	}
  1021  	return false
  1022  }
  1023  
  1024  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
  1025  func zeroUpper56Bits(x *Value, depth int) bool {
  1026  	switch x.Op {
  1027  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1028  		return true
  1029  	case OpArg:
  1030  		return x.Type.Width == 1
  1031  	case OpPhi, OpSelect0, OpSelect1:
  1032  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1033  		// just limit recursion depth.
  1034  		if depth <= 0 {
  1035  			return false
  1036  		}
  1037  		for i := range x.Args {
  1038  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1039  				return false
  1040  			}
  1041  		}
  1042  		return true
  1043  
  1044  	}
  1045  	return false
  1046  }
  1047  
  1048  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1049  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1050  // safe, either because Move is small or because the arguments are disjoint.
  1051  // This is used as a check for replacing memmove with Move ops.
  1052  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1053  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1054  	// Move ops may or may not be faster for large sizes depending on how the platform
  1055  	// lowers them, so we only perform this optimization on platforms that we know to
  1056  	// have fast Move ops.
  1057  	switch c.arch {
  1058  	case "amd64", "amd64p32":
  1059  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1060  	case "386", "ppc64", "ppc64le", "arm64":
  1061  		return sz <= 8
  1062  	case "s390x":
  1063  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1064  	case "arm", "mips", "mips64", "mipsle", "mips64le":
  1065  		return sz <= 4
  1066  	}
  1067  	return false
  1068  }
  1069  
  1070  // hasSmallRotate reports whether the architecture has rotate instructions
  1071  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1072  func hasSmallRotate(c *Config) bool {
  1073  	switch c.arch {
  1074  	case "amd64", "amd64p32", "386":
  1075  		return true
  1076  	default:
  1077  		return false
  1078  	}
  1079  }
  1080  
  1081  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1082  func armBFAuxInt(lsb, width int64) int64 {
  1083  	if lsb < 0 || lsb > 63 {
  1084  		panic("ARM(64) bit field lsb constant out of range")
  1085  	}
  1086  	if width < 1 || width > 64 {
  1087  		panic("ARM(64) bit field width constant out of range")
  1088  	}
  1089  	return width | lsb<<8
  1090  }
  1091  
  1092  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1093  func getARM64BFlsb(bfc int64) int64 {
  1094  	return int64(uint64(bfc) >> 8)
  1095  }
  1096  
  1097  // returns the width part of the auxInt field of arm64 bitfield ops.
  1098  func getARM64BFwidth(bfc int64) int64 {
  1099  	return bfc & 0xff
  1100  }
  1101  
  1102  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1103  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1104  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1105  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1106  }
  1107  
  1108  // returns the bitfield width of mask >> rshift for arm64 bitfield ops
  1109  func arm64BFWidth(mask, rshift int64) int64 {
  1110  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1111  	if shiftedMask == 0 {
  1112  		panic("ARM64 BF mask is zero")
  1113  	}
  1114  	return nto(shiftedMask)
  1115  }
  1116  
  1117  // sizeof returns the size of t in bytes.
  1118  // It will panic if t is not a *types.Type.
  1119  func sizeof(t interface{}) int64 {
  1120  	return t.(*types.Type).Size()
  1121  }
  1122  
  1123  // alignof returns the alignment of t in bytes.
  1124  // It will panic if t is not a *types.Type.
  1125  func alignof(t interface{}) int64 {
  1126  	return t.(*types.Type).Alignment()
  1127  }
  1128  
  1129  // registerizable reports whether t is a primitive type that fits in
  1130  // a register. It assumes float64 values will always fit into registers
  1131  // even if that isn't strictly true.
  1132  // It will panic if t is not a *types.Type.
  1133  func registerizable(b *Block, t interface{}) bool {
  1134  	typ := t.(*types.Type)
  1135  	if typ.IsPtrShaped() || typ.IsFloat() {
  1136  		return true
  1137  	}
  1138  	if typ.IsInteger() {
  1139  		return typ.Size() <= b.Func.Config.RegSize
  1140  	}
  1141  	return false
  1142  }
  1143  
  1144  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1145  func needRaceCleanup(sym interface{}, v *Value) bool {
  1146  	f := v.Block.Func
  1147  	if !f.Config.Race {
  1148  		return false
  1149  	}
  1150  	if !isSameSym(sym, "runtime.racefuncenter") && !isSameSym(sym, "runtime.racefuncexit") {
  1151  		return false
  1152  	}
  1153  	for _, b := range f.Blocks {
  1154  		for _, v := range b.Values {
  1155  			switch v.Op {
  1156  			case OpStaticCall:
  1157  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1158  				// Allow calls to panic*
  1159  				s := v.Aux.(fmt.Stringer).String()
  1160  				switch s {
  1161  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1162  					"runtime.panicdivide", "runtime.panicwrap",
  1163  					"runtime.panicshift":
  1164  					continue
  1165  				}
  1166  				// If we encountered any call, we need to keep racefunc*,
  1167  				// for accurate stacktraces.
  1168  				return false
  1169  			case OpPanicBounds, OpPanicExtend:
  1170  				// Note: these are panic generators that are ok (like the static calls above).
  1171  			case OpClosureCall, OpInterCall:
  1172  				// We must keep the race functions if there are any other call types.
  1173  				return false
  1174  			}
  1175  		}
  1176  	}
  1177  	return true
  1178  }
  1179  
  1180  // symIsRO reports whether sym is a read-only global.
  1181  func symIsRO(sym interface{}) bool {
  1182  	lsym := sym.(*obj.LSym)
  1183  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1184  }
  1185  
  1186  // read8 reads one byte from the read-only global sym at offset off.
  1187  func read8(sym interface{}, off int64) uint8 {
  1188  	lsym := sym.(*obj.LSym)
  1189  	if off >= int64(len(lsym.P)) || off < 0 {
  1190  		// Invalid index into the global sym.
  1191  		// This can happen in dead code, so we don't want to panic.
  1192  		// Just return any value, it will eventually get ignored.
  1193  		// See issue 29215.
  1194  		return 0
  1195  	}
  1196  	return lsym.P[off]
  1197  }
  1198  
  1199  // read16 reads two bytes from the read-only global sym at offset off.
  1200  func read16(sym interface{}, off int64, bigEndian bool) uint16 {
  1201  	lsym := sym.(*obj.LSym)
  1202  	if off >= int64(len(lsym.P))-1 || off < 0 {
  1203  		return 0
  1204  	}
  1205  	if bigEndian {
  1206  		return binary.BigEndian.Uint16(lsym.P[off:])
  1207  	} else {
  1208  		return binary.LittleEndian.Uint16(lsym.P[off:])
  1209  	}
  1210  }
  1211  
  1212  // read32 reads four bytes from the read-only global sym at offset off.
  1213  func read32(sym interface{}, off int64, bigEndian bool) uint32 {
  1214  	lsym := sym.(*obj.LSym)
  1215  	if off >= int64(len(lsym.P))-3 || off < 0 {
  1216  		return 0
  1217  	}
  1218  	if bigEndian {
  1219  		return binary.BigEndian.Uint32(lsym.P[off:])
  1220  	} else {
  1221  		return binary.LittleEndian.Uint32(lsym.P[off:])
  1222  	}
  1223  }
  1224  
  1225  // read64 reads eight bytes from the read-only global sym at offset off.
  1226  func read64(sym interface{}, off int64, bigEndian bool) uint64 {
  1227  	lsym := sym.(*obj.LSym)
  1228  	if off >= int64(len(lsym.P))-7 || off < 0 {
  1229  		return 0
  1230  	}
  1231  	if bigEndian {
  1232  		return binary.BigEndian.Uint64(lsym.P[off:])
  1233  	} else {
  1234  		return binary.LittleEndian.Uint64(lsym.P[off:])
  1235  	}
  1236  }
  1237  

View as plain text