rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	var iters int
    44  	var states map[string]bool
    45  	for {
    46  		change := false
    47  		deadChange := false
    48  		for _, b := range f.Blocks {
    49  			var b0 *Block
    50  			if debug > 1 {
    51  				b0 = new(Block)
    52  				*b0 = *b
    53  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    54  			}
    55  			for i, c := range b.ControlValues() {
    56  				for c.Op == OpCopy {
    57  					c = c.Args[0]
    58  					b.ReplaceControl(i, c)
    59  				}
    60  			}
    61  			if rb(b) {
    62  				change = true
    63  				if debug > 1 {
    64  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    65  				}
    66  			}
    67  			for j, v := range b.Values {
    68  				var v0 *Value
    69  				if debug > 1 {
    70  					v0 = new(Value)
    71  					*v0 = *v
    72  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    73  				}
    74  				if v.Uses == 0 && v.removeable() {
    75  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    76  						// Reset any values that are now unused, so that we decrement
    77  						// the use count of all of its arguments.
    78  						// Not quite a deadcode pass, because it does not handle cycles.
    79  						// But it should help Uses==1 rules to fire.
    80  						v.reset(OpInvalid)
    81  						deadChange = true
    82  					}
    83  					// No point rewriting values which aren't used.
    84  					continue
    85  				}
    86  
    87  				vchange := phielimValue(v)
    88  				if vchange && debug > 1 {
    89  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    90  				}
    91  
    92  				// Eliminate copy inputs.
    93  				// If any copy input becomes unused, mark it
    94  				// as invalid and discard its argument. Repeat
    95  				// recursively on the discarded argument.
    96  				// This phase helps remove phantom "dead copy" uses
    97  				// of a value so that a x.Uses==1 rule condition
    98  				// fires reliably.
    99  				for i, a := range v.Args {
   100  					if a.Op != OpCopy {
   101  						continue
   102  					}
   103  					aa := copySource(a)
   104  					v.SetArg(i, aa)
   105  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   106  					// to hold it.  The first candidate is the value that will replace a (aa),
   107  					// if it shares the same block and line and is eligible.
   108  					// The second option is v, which has a as an input.  Because aa is earlier in
   109  					// the data flow, it is the better choice.
   110  					if a.Pos.IsStmt() == src.PosIsStmt {
   111  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   112  							aa.Pos = aa.Pos.WithIsStmt()
   113  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   114  							v.Pos = v.Pos.WithIsStmt()
   115  						} else {
   116  							// Record the lost line and look for a new home after all rewrites are complete.
   117  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   118  							// line to appear in more than one block, but only one block is stored, so if both end
   119  							// up here, then one will be lost.
   120  							pendingLines.set(a.Pos, int32(a.Block.ID))
   121  						}
   122  						a.Pos = a.Pos.WithNotStmt()
   123  					}
   124  					vchange = true
   125  					for a.Uses == 0 {
   126  						b := a.Args[0]
   127  						a.reset(OpInvalid)
   128  						a = b
   129  					}
   130  				}
   131  				if vchange && debug > 1 {
   132  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   133  				}
   134  
   135  				// apply rewrite function
   136  				if rv(v) {
   137  					vchange = true
   138  					// If value changed to a poor choice for a statement boundary, move the boundary
   139  					if v.Pos.IsStmt() == src.PosIsStmt {
   140  						if k := nextGoodStatementIndex(v, j, b); k != j {
   141  							v.Pos = v.Pos.WithNotStmt()
   142  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   143  						}
   144  					}
   145  				}
   146  
   147  				change = change || vchange
   148  				if vchange && debug > 1 {
   149  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   150  				}
   151  			}
   152  		}
   153  		if !change && !deadChange {
   154  			break
   155  		}
   156  		iters++
   157  		if (iters > 1000 || debug >= 2) && change {
   158  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   159  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   160  			// and the maximum value encountered during make.bash is 12.
   161  			// Start checking for cycles. (This is too expensive to do routinely.)
   162  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   163  			if states == nil {
   164  				states = make(map[string]bool)
   165  			}
   166  			h := f.rewriteHash()
   167  			if _, ok := states[h]; ok {
   168  				// We've found a cycle.
   169  				// To diagnose it, set debug to 2 and start again,
   170  				// so that we'll print all rules applied until we complete another cycle.
   171  				// If debug is already >= 2, we've already done that, so it's time to crash.
   172  				if debug < 2 {
   173  					debug = 2
   174  					states = make(map[string]bool)
   175  				} else {
   176  					f.Fatalf("rewrite cycle detected")
   177  				}
   178  			}
   179  			states[h] = true
   180  		}
   181  	}
   182  	// remove clobbered values
   183  	for _, b := range f.Blocks {
   184  		j := 0
   185  		for i, v := range b.Values {
   186  			vl := v.Pos
   187  			if v.Op == OpInvalid {
   188  				if v.Pos.IsStmt() == src.PosIsStmt {
   189  					pendingLines.set(vl, int32(b.ID))
   190  				}
   191  				f.freeValue(v)
   192  				continue
   193  			}
   194  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   195  				pendingLines.remove(vl)
   196  				v.Pos = v.Pos.WithIsStmt()
   197  			}
   198  			if i != j {
   199  				b.Values[j] = v
   200  			}
   201  			j++
   202  		}
   203  		if pendingLines.get(b.Pos) == int32(b.ID) {
   204  			b.Pos = b.Pos.WithIsStmt()
   205  			pendingLines.remove(b.Pos)
   206  		}
   207  		b.truncateValues(j)
   208  	}
   209  }
   210  
   211  // Common functions called from rewriting rules
   212  
   213  func is64BitFloat(t *types.Type) bool {
   214  	return t.Size() == 8 && t.IsFloat()
   215  }
   216  
   217  func is32BitFloat(t *types.Type) bool {
   218  	return t.Size() == 4 && t.IsFloat()
   219  }
   220  
   221  func is64BitInt(t *types.Type) bool {
   222  	return t.Size() == 8 && t.IsInteger()
   223  }
   224  
   225  func is32BitInt(t *types.Type) bool {
   226  	return t.Size() == 4 && t.IsInteger()
   227  }
   228  
   229  func is16BitInt(t *types.Type) bool {
   230  	return t.Size() == 2 && t.IsInteger()
   231  }
   232  
   233  func is8BitInt(t *types.Type) bool {
   234  	return t.Size() == 1 && t.IsInteger()
   235  }
   236  
   237  func isPtr(t *types.Type) bool {
   238  	return t.IsPtrShaped()
   239  }
   240  
   241  // mergeSym merges two symbolic offsets. There is no real merging of
   242  // offsets, we just pick the non-nil one.
   243  func mergeSym(x, y Sym) Sym {
   244  	if x == nil {
   245  		return y
   246  	}
   247  	if y == nil {
   248  		return x
   249  	}
   250  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   251  }
   252  
   253  func canMergeSym(x, y Sym) bool {
   254  	return x == nil || y == nil
   255  }
   256  
   257  // canMergeLoadClobber reports whether the load can be merged into target without
   258  // invalidating the schedule.
   259  // It also checks that the other non-load argument x is something we
   260  // are ok with clobbering.
   261  func canMergeLoadClobber(target, load, x *Value) bool {
   262  	// The register containing x is going to get clobbered.
   263  	// Don't merge if we still need the value of x.
   264  	// We don't have liveness information here, but we can
   265  	// approximate x dying with:
   266  	//  1) target is x's only use.
   267  	//  2) target is not in a deeper loop than x.
   268  	if x.Uses != 1 {
   269  		return false
   270  	}
   271  	loopnest := x.Block.Func.loopnest()
   272  	loopnest.calculateDepths()
   273  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   274  		return false
   275  	}
   276  	return canMergeLoad(target, load)
   277  }
   278  
   279  // canMergeLoad reports whether the load can be merged into target without
   280  // invalidating the schedule.
   281  func canMergeLoad(target, load *Value) bool {
   282  	if target.Block.ID != load.Block.ID {
   283  		// If the load is in a different block do not merge it.
   284  		return false
   285  	}
   286  
   287  	// We can't merge the load into the target if the load
   288  	// has more than one use.
   289  	if load.Uses != 1 {
   290  		return false
   291  	}
   292  
   293  	mem := load.MemoryArg()
   294  
   295  	// We need the load's memory arg to still be alive at target. That
   296  	// can't be the case if one of target's args depends on a memory
   297  	// state that is a successor of load's memory arg.
   298  	//
   299  	// For example, it would be invalid to merge load into target in
   300  	// the following situation because newmem has killed oldmem
   301  	// before target is reached:
   302  	//     load = read ... oldmem
   303  	//   newmem = write ... oldmem
   304  	//     arg0 = read ... newmem
   305  	//   target = add arg0 load
   306  	//
   307  	// If the argument comes from a different block then we can exclude
   308  	// it immediately because it must dominate load (which is in the
   309  	// same block as target).
   310  	var args []*Value
   311  	for _, a := range target.Args {
   312  		if a != load && a.Block.ID == target.Block.ID {
   313  			args = append(args, a)
   314  		}
   315  	}
   316  
   317  	// memPreds contains memory states known to be predecessors of load's
   318  	// memory state. It is lazily initialized.
   319  	var memPreds map[*Value]bool
   320  	for i := 0; len(args) > 0; i++ {
   321  		const limit = 100
   322  		if i >= limit {
   323  			// Give up if we have done a lot of iterations.
   324  			return false
   325  		}
   326  		v := args[len(args)-1]
   327  		args = args[:len(args)-1]
   328  		if target.Block.ID != v.Block.ID {
   329  			// Since target and load are in the same block
   330  			// we can stop searching when we leave the block.
   331  			continue
   332  		}
   333  		if v.Op == OpPhi {
   334  			// A Phi implies we have reached the top of the block.
   335  			// The memory phi, if it exists, is always
   336  			// the first logical store in the block.
   337  			continue
   338  		}
   339  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   340  			// We could handle this situation however it is likely
   341  			// to be very rare.
   342  			return false
   343  		}
   344  		if v.Op.SymEffect()&SymAddr != 0 {
   345  			// This case prevents an operation that calculates the
   346  			// address of a local variable from being forced to schedule
   347  			// before its corresponding VarDef.
   348  			// See issue 28445.
   349  			//   v1 = LOAD ...
   350  			//   v2 = VARDEF
   351  			//   v3 = LEAQ
   352  			//   v4 = CMPQ v1 v3
   353  			// We don't want to combine the CMPQ with the load, because
   354  			// that would force the CMPQ to schedule before the VARDEF, which
   355  			// in turn requires the LEAQ to schedule before the VARDEF.
   356  			return false
   357  		}
   358  		if v.Type.IsMemory() {
   359  			if memPreds == nil {
   360  				// Initialise a map containing memory states
   361  				// known to be predecessors of load's memory
   362  				// state.
   363  				memPreds = make(map[*Value]bool)
   364  				m := mem
   365  				const limit = 50
   366  				for i := 0; i < limit; i++ {
   367  					if m.Op == OpPhi {
   368  						// The memory phi, if it exists, is always
   369  						// the first logical store in the block.
   370  						break
   371  					}
   372  					if m.Block.ID != target.Block.ID {
   373  						break
   374  					}
   375  					if !m.Type.IsMemory() {
   376  						break
   377  					}
   378  					memPreds[m] = true
   379  					if len(m.Args) == 0 {
   380  						break
   381  					}
   382  					m = m.MemoryArg()
   383  				}
   384  			}
   385  
   386  			// We can merge if v is a predecessor of mem.
   387  			//
   388  			// For example, we can merge load into target in the
   389  			// following scenario:
   390  			//      x = read ... v
   391  			//    mem = write ... v
   392  			//   load = read ... mem
   393  			// target = add x load
   394  			if memPreds[v] {
   395  				continue
   396  			}
   397  			return false
   398  		}
   399  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   400  			// If v takes mem as an input then we know mem
   401  			// is valid at this point.
   402  			continue
   403  		}
   404  		for _, a := range v.Args {
   405  			if target.Block.ID == a.Block.ID {
   406  				args = append(args, a)
   407  			}
   408  		}
   409  	}
   410  
   411  	return true
   412  }
   413  
   414  // isSameCall reports whether sym is the same as the given named symbol.
   415  func isSameCall(sym interface{}, name string) bool {
   416  	fn := sym.(*AuxCall).Fn
   417  	return fn != nil && fn.String() == name
   418  }
   419  
   420  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   421  func canLoadUnaligned(c *Config) bool {
   422  	return c.ctxt.Arch.Alignment == 1
   423  }
   424  
   425  // nlzX returns the number of leading zeros.
   426  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   427  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   428  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   429  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   430  
   431  // ntzX returns the number of trailing zeros.
   432  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   433  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   434  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   435  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   436  
   437  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   438  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   439  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   440  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   441  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   442  
   443  // nto returns the number of trailing ones.
   444  func nto(x int64) int64 {
   445  	return int64(ntz64(^x))
   446  }
   447  
   448  // logX returns logarithm of n base 2.
   449  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   450  func log8(n int8) int64 {
   451  	return int64(bits.Len8(uint8(n))) - 1
   452  }
   453  func log16(n int16) int64 {
   454  	return int64(bits.Len16(uint16(n))) - 1
   455  }
   456  func log32(n int32) int64 {
   457  	return int64(bits.Len32(uint32(n))) - 1
   458  }
   459  func log64(n int64) int64 {
   460  	return int64(bits.Len64(uint64(n))) - 1
   461  }
   462  
   463  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   464  // Rounds down.
   465  func log2uint32(n int64) int64 {
   466  	return int64(bits.Len32(uint32(n))) - 1
   467  }
   468  
   469  // isPowerOfTwoX functions report whether n is a power of 2.
   470  func isPowerOfTwo8(n int8) bool {
   471  	return n > 0 && n&(n-1) == 0
   472  }
   473  func isPowerOfTwo16(n int16) bool {
   474  	return n > 0 && n&(n-1) == 0
   475  }
   476  func isPowerOfTwo32(n int32) bool {
   477  	return n > 0 && n&(n-1) == 0
   478  }
   479  func isPowerOfTwo64(n int64) bool {
   480  	return n > 0 && n&(n-1) == 0
   481  }
   482  
   483  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   484  func isUint64PowerOfTwo(in int64) bool {
   485  	n := uint64(in)
   486  	return n > 0 && n&(n-1) == 0
   487  }
   488  
   489  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   490  func isUint32PowerOfTwo(in int64) bool {
   491  	n := uint64(uint32(in))
   492  	return n > 0 && n&(n-1) == 0
   493  }
   494  
   495  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   496  func is32Bit(n int64) bool {
   497  	return n == int64(int32(n))
   498  }
   499  
   500  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   501  func is16Bit(n int64) bool {
   502  	return n == int64(int16(n))
   503  }
   504  
   505  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   506  func is8Bit(n int64) bool {
   507  	return n == int64(int8(n))
   508  }
   509  
   510  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   511  func isU8Bit(n int64) bool {
   512  	return n == int64(uint8(n))
   513  }
   514  
   515  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   516  func isU12Bit(n int64) bool {
   517  	return 0 <= n && n < (1<<12)
   518  }
   519  
   520  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   521  func isU16Bit(n int64) bool {
   522  	return n == int64(uint16(n))
   523  }
   524  
   525  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   526  func isU32Bit(n int64) bool {
   527  	return n == int64(uint32(n))
   528  }
   529  
   530  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   531  func is20Bit(n int64) bool {
   532  	return -(1<<19) <= n && n < (1<<19)
   533  }
   534  
   535  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   536  func b2i(b bool) int64 {
   537  	if b {
   538  		return 1
   539  	}
   540  	return 0
   541  }
   542  
   543  // b2i32 translates a boolean value to 0 or 1.
   544  func b2i32(b bool) int32 {
   545  	if b {
   546  		return 1
   547  	}
   548  	return 0
   549  }
   550  
   551  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   552  // A shift is bounded if it is shifting by less than the width of the shifted value.
   553  func shiftIsBounded(v *Value) bool {
   554  	return v.AuxInt != 0
   555  }
   556  
   557  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   558  // generated code as much as possible.
   559  func canonLessThan(x, y *Value) bool {
   560  	if x.Op != y.Op {
   561  		return x.Op < y.Op
   562  	}
   563  	if !x.Pos.SameFileAndLine(y.Pos) {
   564  		return x.Pos.Before(y.Pos)
   565  	}
   566  	return x.ID < y.ID
   567  }
   568  
   569  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   570  // of the mantissa. It will panic if the truncation results in lost information.
   571  func truncate64Fto32F(f float64) float32 {
   572  	if !isExactFloat32(f) {
   573  		panic("truncate64Fto32F: truncation is not exact")
   574  	}
   575  	if !math.IsNaN(f) {
   576  		return float32(f)
   577  	}
   578  	// NaN bit patterns aren't necessarily preserved across conversion
   579  	// instructions so we need to do the conversion manually.
   580  	b := math.Float64bits(f)
   581  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   582  	//          | sign                  | exponent   | mantissa       |
   583  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   584  	return math.Float32frombits(r)
   585  }
   586  
   587  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   588  // pattern of the mantissa.
   589  func extend32Fto64F(f float32) float64 {
   590  	if !math.IsNaN(float64(f)) {
   591  		return float64(f)
   592  	}
   593  	// NaN bit patterns aren't necessarily preserved across conversion
   594  	// instructions so we need to do the conversion manually.
   595  	b := uint64(math.Float32bits(f))
   596  	//   | sign                  | exponent      | mantissa                    |
   597  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   598  	return math.Float64frombits(r)
   599  }
   600  
   601  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   602  func DivisionNeedsFixUp(v *Value) bool {
   603  	return v.AuxInt == 0
   604  }
   605  
   606  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   607  func auxFrom64F(f float64) int64 {
   608  	if f != f {
   609  		panic("can't encode a NaN in AuxInt field")
   610  	}
   611  	return int64(math.Float64bits(f))
   612  }
   613  
   614  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   615  func auxFrom32F(f float32) int64 {
   616  	if f != f {
   617  		panic("can't encode a NaN in AuxInt field")
   618  	}
   619  	return int64(math.Float64bits(extend32Fto64F(f)))
   620  }
   621  
   622  // auxTo32F decodes a float32 from the AuxInt value provided.
   623  func auxTo32F(i int64) float32 {
   624  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   625  }
   626  
   627  // auxTo64F decodes a float64 from the AuxInt value provided.
   628  func auxTo64F(i int64) float64 {
   629  	return math.Float64frombits(uint64(i))
   630  }
   631  
   632  func auxIntToBool(i int64) bool {
   633  	if i == 0 {
   634  		return false
   635  	}
   636  	return true
   637  }
   638  func auxIntToInt8(i int64) int8 {
   639  	return int8(i)
   640  }
   641  func auxIntToInt16(i int64) int16 {
   642  	return int16(i)
   643  }
   644  func auxIntToInt32(i int64) int32 {
   645  	return int32(i)
   646  }
   647  func auxIntToInt64(i int64) int64 {
   648  	return i
   649  }
   650  func auxIntToUint8(i int64) uint8 {
   651  	return uint8(i)
   652  }
   653  func auxIntToFloat32(i int64) float32 {
   654  	return float32(math.Float64frombits(uint64(i)))
   655  }
   656  func auxIntToFloat64(i int64) float64 {
   657  	return math.Float64frombits(uint64(i))
   658  }
   659  func auxIntToValAndOff(i int64) ValAndOff {
   660  	return ValAndOff(i)
   661  }
   662  func auxIntToArm64BitField(i int64) arm64BitField {
   663  	return arm64BitField(i)
   664  }
   665  func auxIntToInt128(x int64) int128 {
   666  	if x != 0 {
   667  		panic("nonzero int128 not allowed")
   668  	}
   669  	return 0
   670  }
   671  func auxIntToFlagConstant(x int64) flagConstant {
   672  	return flagConstant(x)
   673  }
   674  
   675  func auxIntToOp(cc int64) Op {
   676  	return Op(cc)
   677  }
   678  
   679  func boolToAuxInt(b bool) int64 {
   680  	if b {
   681  		return 1
   682  	}
   683  	return 0
   684  }
   685  func int8ToAuxInt(i int8) int64 {
   686  	return int64(i)
   687  }
   688  func int16ToAuxInt(i int16) int64 {
   689  	return int64(i)
   690  }
   691  func int32ToAuxInt(i int32) int64 {
   692  	return int64(i)
   693  }
   694  func int64ToAuxInt(i int64) int64 {
   695  	return int64(i)
   696  }
   697  func uint8ToAuxInt(i uint8) int64 {
   698  	return int64(int8(i))
   699  }
   700  func float32ToAuxInt(f float32) int64 {
   701  	return int64(math.Float64bits(float64(f)))
   702  }
   703  func float64ToAuxInt(f float64) int64 {
   704  	return int64(math.Float64bits(f))
   705  }
   706  func valAndOffToAuxInt(v ValAndOff) int64 {
   707  	return int64(v)
   708  }
   709  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   710  	return int64(v)
   711  }
   712  func int128ToAuxInt(x int128) int64 {
   713  	if x != 0 {
   714  		panic("nonzero int128 not allowed")
   715  	}
   716  	return 0
   717  }
   718  func flagConstantToAuxInt(x flagConstant) int64 {
   719  	return int64(x)
   720  }
   721  
   722  func opToAuxInt(o Op) int64 {
   723  	return int64(o)
   724  }
   725  
   726  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   727  type Aux interface {
   728  	CanBeAnSSAAux()
   729  }
   730  
   731  // for now only used to mark moves that need to avoid clobbering flags
   732  type auxMark bool
   733  
   734  func (auxMark) CanBeAnSSAAux() {}
   735  
   736  var AuxMark auxMark
   737  
   738  // stringAux wraps string values for use in Aux.
   739  type stringAux string
   740  
   741  func (stringAux) CanBeAnSSAAux() {}
   742  
   743  func auxToString(i Aux) string {
   744  	return string(i.(stringAux))
   745  }
   746  func auxToSym(i Aux) Sym {
   747  	// TODO: kind of a hack - allows nil interface through
   748  	s, _ := i.(Sym)
   749  	return s
   750  }
   751  func auxToType(i Aux) *types.Type {
   752  	return i.(*types.Type)
   753  }
   754  func auxToCall(i Aux) *AuxCall {
   755  	return i.(*AuxCall)
   756  }
   757  func auxToS390xCCMask(i Aux) s390x.CCMask {
   758  	return i.(s390x.CCMask)
   759  }
   760  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   761  	return i.(s390x.RotateParams)
   762  }
   763  
   764  func StringToAux(s string) Aux {
   765  	return stringAux(s)
   766  }
   767  func symToAux(s Sym) Aux {
   768  	return s
   769  }
   770  func callToAux(s *AuxCall) Aux {
   771  	return s
   772  }
   773  func typeToAux(t *types.Type) Aux {
   774  	return t
   775  }
   776  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   777  	return c
   778  }
   779  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   780  	return r
   781  }
   782  
   783  // uaddOvf reports whether unsigned a+b would overflow.
   784  func uaddOvf(a, b int64) bool {
   785  	return uint64(a)+uint64(b) < uint64(a)
   786  }
   787  
   788  // loadLSymOffset simulates reading a word at an offset into a
   789  // read-only symbol's runtime memory. If it would read a pointer to
   790  // another symbol, that symbol is returned. Otherwise, it returns nil.
   791  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   792  	if lsym.Type != objabi.SRODATA {
   793  		return nil
   794  	}
   795  
   796  	for _, r := range lsym.R {
   797  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   798  			return r.Sym
   799  		}
   800  	}
   801  
   802  	return nil
   803  }
   804  
   805  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   806  	v.Op = OpStaticLECall
   807  	auxcall := v.Aux.(*AuxCall)
   808  	auxcall.Fn = sym
   809  	// Remove first arg
   810  	v.Args[0].Uses--
   811  	copy(v.Args[0:], v.Args[1:])
   812  	v.Args[len(v.Args)-1] = nil // aid GC
   813  	v.Args = v.Args[:len(v.Args)-1]
   814  	if f := v.Block.Func; f.pass.debug > 0 {
   815  		f.Warnl(v.Pos, "de-virtualizing call")
   816  	}
   817  	return v
   818  }
   819  
   820  // isSamePtr reports whether p1 and p2 point to the same address.
   821  func isSamePtr(p1, p2 *Value) bool {
   822  	if p1 == p2 {
   823  		return true
   824  	}
   825  	if p1.Op != p2.Op {
   826  		return false
   827  	}
   828  	switch p1.Op {
   829  	case OpOffPtr:
   830  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   831  	case OpAddr, OpLocalAddr:
   832  		return p1.Aux == p2.Aux
   833  	case OpAddPtr:
   834  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   835  	}
   836  	return false
   837  }
   838  
   839  func isStackPtr(v *Value) bool {
   840  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   841  		v = v.Args[0]
   842  	}
   843  	return v.Op == OpSP || v.Op == OpLocalAddr
   844  }
   845  
   846  // disjoint reports whether the memory region specified by [p1:p1+n1)
   847  // does not overlap with [p2:p2+n2).
   848  // A return value of false does not imply the regions overlap.
   849  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   850  	if n1 == 0 || n2 == 0 {
   851  		return true
   852  	}
   853  	if p1 == p2 {
   854  		return false
   855  	}
   856  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   857  		base, offset = ptr, 0
   858  		for base.Op == OpOffPtr {
   859  			offset += base.AuxInt
   860  			base = base.Args[0]
   861  		}
   862  		if opcodeTable[base.Op].nilCheck {
   863  			base = base.Args[0]
   864  		}
   865  		return base, offset
   866  	}
   867  	p1, off1 := baseAndOffset(p1)
   868  	p2, off2 := baseAndOffset(p2)
   869  	if isSamePtr(p1, p2) {
   870  		return !overlap(off1, n1, off2, n2)
   871  	}
   872  	// p1 and p2 are not the same, so if they are both OpAddrs then
   873  	// they point to different variables.
   874  	// If one pointer is on the stack and the other is an argument
   875  	// then they can't overlap.
   876  	switch p1.Op {
   877  	case OpAddr, OpLocalAddr:
   878  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   879  			return true
   880  		}
   881  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   882  	case OpArg, OpArgIntReg:
   883  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   884  			return true
   885  		}
   886  	case OpSP:
   887  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   888  	}
   889  	return false
   890  }
   891  
   892  // moveSize returns the number of bytes an aligned MOV instruction moves.
   893  func moveSize(align int64, c *Config) int64 {
   894  	switch {
   895  	case align%8 == 0 && c.PtrSize == 8:
   896  		return 8
   897  	case align%4 == 0:
   898  		return 4
   899  	case align%2 == 0:
   900  		return 2
   901  	}
   902  	return 1
   903  }
   904  
   905  // mergePoint finds a block among a's blocks which dominates b and is itself
   906  // dominated by all of a's blocks. Returns nil if it can't find one.
   907  // Might return nil even if one does exist.
   908  func mergePoint(b *Block, a ...*Value) *Block {
   909  	// Walk backward from b looking for one of the a's blocks.
   910  
   911  	// Max distance
   912  	d := 100
   913  
   914  	for d > 0 {
   915  		for _, x := range a {
   916  			if b == x.Block {
   917  				goto found
   918  			}
   919  		}
   920  		if len(b.Preds) > 1 {
   921  			// Don't know which way to go back. Abort.
   922  			return nil
   923  		}
   924  		b = b.Preds[0].b
   925  		d--
   926  	}
   927  	return nil // too far away
   928  found:
   929  	// At this point, r is the first value in a that we find by walking backwards.
   930  	// if we return anything, r will be it.
   931  	r := b
   932  
   933  	// Keep going, counting the other a's that we find. They must all dominate r.
   934  	na := 0
   935  	for d > 0 {
   936  		for _, x := range a {
   937  			if b == x.Block {
   938  				na++
   939  			}
   940  		}
   941  		if na == len(a) {
   942  			// Found all of a in a backwards walk. We can return r.
   943  			return r
   944  		}
   945  		if len(b.Preds) > 1 {
   946  			return nil
   947  		}
   948  		b = b.Preds[0].b
   949  		d--
   950  
   951  	}
   952  	return nil // too far away
   953  }
   954  
   955  // clobber invalidates values. Returns true.
   956  // clobber is used by rewrite rules to:
   957  //
   958  //	A) make sure the values are really dead and never used again.
   959  //	B) decrement use counts of the values' args.
   960  func clobber(vv ...*Value) bool {
   961  	for _, v := range vv {
   962  		v.reset(OpInvalid)
   963  		// Note: leave v.Block intact.  The Block field is used after clobber.
   964  	}
   965  	return true
   966  }
   967  
   968  // clobberIfDead resets v when use count is 1. Returns true.
   969  // clobberIfDead is used by rewrite rules to decrement
   970  // use counts of v's args when v is dead and never used.
   971  func clobberIfDead(v *Value) bool {
   972  	if v.Uses == 1 {
   973  		v.reset(OpInvalid)
   974  	}
   975  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   976  	return true
   977  }
   978  
   979  // noteRule is an easy way to track if a rule is matched when writing
   980  // new ones.  Make the rule of interest also conditional on
   981  //
   982  //	noteRule("note to self: rule of interest matched")
   983  //
   984  // and that message will print when the rule matches.
   985  func noteRule(s string) bool {
   986  	fmt.Println(s)
   987  	return true
   988  }
   989  
   990  // countRule increments Func.ruleMatches[key].
   991  // If Func.ruleMatches is non-nil at the end
   992  // of compilation, it will be printed to stdout.
   993  // This is intended to make it easier to find which functions
   994  // which contain lots of rules matches when developing new rules.
   995  func countRule(v *Value, key string) bool {
   996  	f := v.Block.Func
   997  	if f.ruleMatches == nil {
   998  		f.ruleMatches = make(map[string]int)
   999  	}
  1000  	f.ruleMatches[key]++
  1001  	return true
  1002  }
  1003  
  1004  // warnRule generates compiler debug output with string s when
  1005  // v is not in autogenerated code, cond is true and the rule has fired.
  1006  func warnRule(cond bool, v *Value, s string) bool {
  1007  	if pos := v.Pos; pos.Line() > 1 && cond {
  1008  		v.Block.Func.Warnl(pos, s)
  1009  	}
  1010  	return true
  1011  }
  1012  
  1013  // for a pseudo-op like (LessThan x), extract x.
  1014  func flagArg(v *Value) *Value {
  1015  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1016  		return nil
  1017  	}
  1018  	return v.Args[0]
  1019  }
  1020  
  1021  // arm64Negate finds the complement to an ARM64 condition code,
  1022  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1023  //
  1024  // For floating point, it's more subtle because NaN is unordered. We do
  1025  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1026  func arm64Negate(op Op) Op {
  1027  	switch op {
  1028  	case OpARM64LessThan:
  1029  		return OpARM64GreaterEqual
  1030  	case OpARM64LessThanU:
  1031  		return OpARM64GreaterEqualU
  1032  	case OpARM64GreaterThan:
  1033  		return OpARM64LessEqual
  1034  	case OpARM64GreaterThanU:
  1035  		return OpARM64LessEqualU
  1036  	case OpARM64LessEqual:
  1037  		return OpARM64GreaterThan
  1038  	case OpARM64LessEqualU:
  1039  		return OpARM64GreaterThanU
  1040  	case OpARM64GreaterEqual:
  1041  		return OpARM64LessThan
  1042  	case OpARM64GreaterEqualU:
  1043  		return OpARM64LessThanU
  1044  	case OpARM64Equal:
  1045  		return OpARM64NotEqual
  1046  	case OpARM64NotEqual:
  1047  		return OpARM64Equal
  1048  	case OpARM64LessThanF:
  1049  		return OpARM64NotLessThanF
  1050  	case OpARM64NotLessThanF:
  1051  		return OpARM64LessThanF
  1052  	case OpARM64LessEqualF:
  1053  		return OpARM64NotLessEqualF
  1054  	case OpARM64NotLessEqualF:
  1055  		return OpARM64LessEqualF
  1056  	case OpARM64GreaterThanF:
  1057  		return OpARM64NotGreaterThanF
  1058  	case OpARM64NotGreaterThanF:
  1059  		return OpARM64GreaterThanF
  1060  	case OpARM64GreaterEqualF:
  1061  		return OpARM64NotGreaterEqualF
  1062  	case OpARM64NotGreaterEqualF:
  1063  		return OpARM64GreaterEqualF
  1064  	default:
  1065  		panic("unreachable")
  1066  	}
  1067  }
  1068  
  1069  // arm64Invert evaluates (InvertFlags op), which
  1070  // is the same as altering the condition codes such
  1071  // that the same result would be produced if the arguments
  1072  // to the flag-generating instruction were reversed, e.g.
  1073  // (InvertFlags (CMP x y)) -> (CMP y x)
  1074  func arm64Invert(op Op) Op {
  1075  	switch op {
  1076  	case OpARM64LessThan:
  1077  		return OpARM64GreaterThan
  1078  	case OpARM64LessThanU:
  1079  		return OpARM64GreaterThanU
  1080  	case OpARM64GreaterThan:
  1081  		return OpARM64LessThan
  1082  	case OpARM64GreaterThanU:
  1083  		return OpARM64LessThanU
  1084  	case OpARM64LessEqual:
  1085  		return OpARM64GreaterEqual
  1086  	case OpARM64LessEqualU:
  1087  		return OpARM64GreaterEqualU
  1088  	case OpARM64GreaterEqual:
  1089  		return OpARM64LessEqual
  1090  	case OpARM64GreaterEqualU:
  1091  		return OpARM64LessEqualU
  1092  	case OpARM64Equal, OpARM64NotEqual:
  1093  		return op
  1094  	case OpARM64LessThanF:
  1095  		return OpARM64GreaterThanF
  1096  	case OpARM64GreaterThanF:
  1097  		return OpARM64LessThanF
  1098  	case OpARM64LessEqualF:
  1099  		return OpARM64GreaterEqualF
  1100  	case OpARM64GreaterEqualF:
  1101  		return OpARM64LessEqualF
  1102  	case OpARM64NotLessThanF:
  1103  		return OpARM64NotGreaterThanF
  1104  	case OpARM64NotGreaterThanF:
  1105  		return OpARM64NotLessThanF
  1106  	case OpARM64NotLessEqualF:
  1107  		return OpARM64NotGreaterEqualF
  1108  	case OpARM64NotGreaterEqualF:
  1109  		return OpARM64NotLessEqualF
  1110  	default:
  1111  		panic("unreachable")
  1112  	}
  1113  }
  1114  
  1115  // evaluate an ARM64 op against a flags value
  1116  // that is potentially constant; return 1 for true,
  1117  // -1 for false, and 0 for not constant.
  1118  func ccARM64Eval(op Op, flags *Value) int {
  1119  	fop := flags.Op
  1120  	if fop == OpARM64InvertFlags {
  1121  		return -ccARM64Eval(op, flags.Args[0])
  1122  	}
  1123  	if fop != OpARM64FlagConstant {
  1124  		return 0
  1125  	}
  1126  	fc := flagConstant(flags.AuxInt)
  1127  	b2i := func(b bool) int {
  1128  		if b {
  1129  			return 1
  1130  		}
  1131  		return -1
  1132  	}
  1133  	switch op {
  1134  	case OpARM64Equal:
  1135  		return b2i(fc.eq())
  1136  	case OpARM64NotEqual:
  1137  		return b2i(fc.ne())
  1138  	case OpARM64LessThan:
  1139  		return b2i(fc.lt())
  1140  	case OpARM64LessThanU:
  1141  		return b2i(fc.ult())
  1142  	case OpARM64GreaterThan:
  1143  		return b2i(fc.gt())
  1144  	case OpARM64GreaterThanU:
  1145  		return b2i(fc.ugt())
  1146  	case OpARM64LessEqual:
  1147  		return b2i(fc.le())
  1148  	case OpARM64LessEqualU:
  1149  		return b2i(fc.ule())
  1150  	case OpARM64GreaterEqual:
  1151  		return b2i(fc.ge())
  1152  	case OpARM64GreaterEqualU:
  1153  		return b2i(fc.uge())
  1154  	}
  1155  	return 0
  1156  }
  1157  
  1158  // logRule logs the use of the rule s. This will only be enabled if
  1159  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1160  func logRule(s string) {
  1161  	if ruleFile == nil {
  1162  		// Open a log file to write log to. We open in append
  1163  		// mode because all.bash runs the compiler lots of times,
  1164  		// and we want the concatenation of all of those logs.
  1165  		// This means, of course, that users need to rm the old log
  1166  		// to get fresh data.
  1167  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1168  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1169  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1170  		if err != nil {
  1171  			panic(err)
  1172  		}
  1173  		ruleFile = w
  1174  	}
  1175  	_, err := fmt.Fprintln(ruleFile, s)
  1176  	if err != nil {
  1177  		panic(err)
  1178  	}
  1179  }
  1180  
  1181  var ruleFile io.Writer
  1182  
  1183  func min(x, y int64) int64 {
  1184  	if x < y {
  1185  		return x
  1186  	}
  1187  	return y
  1188  }
  1189  func max(x, y int64) int64 {
  1190  	if x > y {
  1191  		return x
  1192  	}
  1193  	return y
  1194  }
  1195  
  1196  func isConstZero(v *Value) bool {
  1197  	switch v.Op {
  1198  	case OpConstNil:
  1199  		return true
  1200  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1201  		return v.AuxInt == 0
  1202  	}
  1203  	return false
  1204  }
  1205  
  1206  // reciprocalExact64 reports whether 1/c is exactly representable.
  1207  func reciprocalExact64(c float64) bool {
  1208  	b := math.Float64bits(c)
  1209  	man := b & (1<<52 - 1)
  1210  	if man != 0 {
  1211  		return false // not a power of 2, denormal, or NaN
  1212  	}
  1213  	exp := b >> 52 & (1<<11 - 1)
  1214  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1215  	// changes the exponent to 0x7fe-exp.
  1216  	switch exp {
  1217  	case 0:
  1218  		return false // ±0
  1219  	case 0x7ff:
  1220  		return false // ±inf
  1221  	case 0x7fe:
  1222  		return false // exponent is not representable
  1223  	default:
  1224  		return true
  1225  	}
  1226  }
  1227  
  1228  // reciprocalExact32 reports whether 1/c is exactly representable.
  1229  func reciprocalExact32(c float32) bool {
  1230  	b := math.Float32bits(c)
  1231  	man := b & (1<<23 - 1)
  1232  	if man != 0 {
  1233  		return false // not a power of 2, denormal, or NaN
  1234  	}
  1235  	exp := b >> 23 & (1<<8 - 1)
  1236  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1237  	// changes the exponent to 0xfe-exp.
  1238  	switch exp {
  1239  	case 0:
  1240  		return false // ±0
  1241  	case 0xff:
  1242  		return false // ±inf
  1243  	case 0xfe:
  1244  		return false // exponent is not representable
  1245  	default:
  1246  		return true
  1247  	}
  1248  }
  1249  
  1250  // check if an immediate can be directly encoded into an ARM's instruction.
  1251  func isARMImmRot(v uint32) bool {
  1252  	for i := 0; i < 16; i++ {
  1253  		if v&^0xff == 0 {
  1254  			return true
  1255  		}
  1256  		v = v<<2 | v>>30
  1257  	}
  1258  
  1259  	return false
  1260  }
  1261  
  1262  // overlap reports whether the ranges given by the given offset and
  1263  // size pairs overlap.
  1264  func overlap(offset1, size1, offset2, size2 int64) bool {
  1265  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1266  		return true
  1267  	}
  1268  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1269  		return true
  1270  	}
  1271  	return false
  1272  }
  1273  
  1274  func areAdjacentOffsets(off1, off2, size int64) bool {
  1275  	return off1+size == off2 || off1 == off2+size
  1276  }
  1277  
  1278  // check if value zeroes out upper 32-bit of 64-bit register.
  1279  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1280  // because it catches same amount of cases as 4.
  1281  func zeroUpper32Bits(x *Value, depth int) bool {
  1282  	switch x.Op {
  1283  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1284  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1285  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1286  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1287  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1288  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1289  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1290  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1291  		OpAMD64SHLL, OpAMD64SHLLconst:
  1292  		return true
  1293  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1294  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1295  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1296  		return true
  1297  	case OpArg: // note: but not ArgIntReg
  1298  		// amd64 always loads args from the stack unsigned.
  1299  		// most other architectures load them sign/zero extended based on the type.
  1300  		return x.Type.Size() == 4 && (x.Type.IsUnsigned() || x.Block.Func.Config.arch == "amd64")
  1301  	case OpPhi, OpSelect0, OpSelect1:
  1302  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1303  		// just limit recursion depth.
  1304  		if depth <= 0 {
  1305  			return false
  1306  		}
  1307  		for i := range x.Args {
  1308  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1309  				return false
  1310  			}
  1311  		}
  1312  		return true
  1313  
  1314  	}
  1315  	return false
  1316  }
  1317  
  1318  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1319  func zeroUpper48Bits(x *Value, depth int) bool {
  1320  	switch x.Op {
  1321  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1322  		return true
  1323  	case OpArg: // note: but not ArgIntReg
  1324  		return x.Type.Size() == 2 && (x.Type.IsUnsigned() || x.Block.Func.Config.arch == "amd64")
  1325  	case OpPhi, OpSelect0, OpSelect1:
  1326  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1327  		// just limit recursion depth.
  1328  		if depth <= 0 {
  1329  			return false
  1330  		}
  1331  		for i := range x.Args {
  1332  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1333  				return false
  1334  			}
  1335  		}
  1336  		return true
  1337  
  1338  	}
  1339  	return false
  1340  }
  1341  
  1342  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1343  func zeroUpper56Bits(x *Value, depth int) bool {
  1344  	switch x.Op {
  1345  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1346  		return true
  1347  	case OpArg: // note: but not ArgIntReg
  1348  		return x.Type.Size() == 1 && (x.Type.IsUnsigned() || x.Block.Func.Config.arch == "amd64")
  1349  	case OpPhi, OpSelect0, OpSelect1:
  1350  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1351  		// just limit recursion depth.
  1352  		if depth <= 0 {
  1353  			return false
  1354  		}
  1355  		for i := range x.Args {
  1356  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1357  				return false
  1358  			}
  1359  		}
  1360  		return true
  1361  
  1362  	}
  1363  	return false
  1364  }
  1365  
  1366  func isInlinableMemclr(c *Config, sz int64) bool {
  1367  	if sz < 0 {
  1368  		return false
  1369  	}
  1370  	// TODO: expand this check to allow other architectures
  1371  	// see CL 454255 and issue 56997
  1372  	switch c.arch {
  1373  	case "amd64", "arm64":
  1374  		return true
  1375  	case "ppc64le", "ppc64":
  1376  		return sz < 512
  1377  	}
  1378  	return false
  1379  }
  1380  
  1381  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1382  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1383  // safe, either because Move will do all of its loads before any of its stores, or
  1384  // because the arguments are known to be disjoint.
  1385  // This is used as a check for replacing memmove with Move ops.
  1386  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1387  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1388  	// Move ops may or may not be faster for large sizes depending on how the platform
  1389  	// lowers them, so we only perform this optimization on platforms that we know to
  1390  	// have fast Move ops.
  1391  	switch c.arch {
  1392  	case "amd64":
  1393  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1394  	case "386", "arm64":
  1395  		return sz <= 8
  1396  	case "s390x", "ppc64", "ppc64le":
  1397  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1398  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1399  		return sz <= 4
  1400  	}
  1401  	return false
  1402  }
  1403  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1404  	return isInlinableMemmove(dst, src, sz, c)
  1405  }
  1406  
  1407  // logLargeCopy logs the occurrence of a large copy.
  1408  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1409  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1410  func logLargeCopy(v *Value, s int64) bool {
  1411  	if s < 128 {
  1412  		return true
  1413  	}
  1414  	if logopt.Enabled() {
  1415  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1416  	}
  1417  	return true
  1418  }
  1419  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1420  	if s < 128 {
  1421  		return
  1422  	}
  1423  	if logopt.Enabled() {
  1424  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1425  	}
  1426  }
  1427  
  1428  // hasSmallRotate reports whether the architecture has rotate instructions
  1429  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1430  func hasSmallRotate(c *Config) bool {
  1431  	switch c.arch {
  1432  	case "amd64", "386":
  1433  		return true
  1434  	default:
  1435  		return false
  1436  	}
  1437  }
  1438  
  1439  func supportsPPC64PCRel() bool {
  1440  	// PCRel is currently supported for >= power10, linux only
  1441  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1442  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1443  }
  1444  
  1445  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1446  	if sh < 0 || sh >= sz {
  1447  		panic("PPC64 shift arg sh out of range")
  1448  	}
  1449  	if mb < 0 || mb >= sz {
  1450  		panic("PPC64 shift arg mb out of range")
  1451  	}
  1452  	if me < 0 || me >= sz {
  1453  		panic("PPC64 shift arg me out of range")
  1454  	}
  1455  	return int32(sh<<16 | mb<<8 | me)
  1456  }
  1457  
  1458  func GetPPC64Shiftsh(auxint int64) int64 {
  1459  	return int64(int8(auxint >> 16))
  1460  }
  1461  
  1462  func GetPPC64Shiftmb(auxint int64) int64 {
  1463  	return int64(int8(auxint >> 8))
  1464  }
  1465  
  1466  func GetPPC64Shiftme(auxint int64) int64 {
  1467  	return int64(int8(auxint))
  1468  }
  1469  
  1470  // Test if this value can encoded as a mask for a rlwinm like
  1471  // operation.  Masks can also extend from the msb and wrap to
  1472  // the lsb too.  That is, the valid masks are 32 bit strings
  1473  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1474  func isPPC64WordRotateMask(v64 int64) bool {
  1475  	// Isolate rightmost 1 (if none 0) and add.
  1476  	v := uint32(v64)
  1477  	vp := (v & -v) + v
  1478  	// Likewise, for the wrapping case.
  1479  	vn := ^v
  1480  	vpn := (vn & -vn) + vn
  1481  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1482  }
  1483  
  1484  // Compress mask and shift into single value of the form
  1485  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1486  // be used to regenerate the input mask.
  1487  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1488  	var mb, me, mbn, men int
  1489  
  1490  	// Determine boundaries and then decode them
  1491  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1492  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1493  	} else if nbits == 32 {
  1494  		mb = bits.LeadingZeros32(uint32(mask))
  1495  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1496  		mbn = bits.LeadingZeros32(^uint32(mask))
  1497  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1498  	} else {
  1499  		mb = bits.LeadingZeros64(uint64(mask))
  1500  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1501  		mbn = bits.LeadingZeros64(^uint64(mask))
  1502  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1503  	}
  1504  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1505  	if mb == 0 && me == int(nbits) {
  1506  		// swap the inverted values
  1507  		mb, me = men, mbn
  1508  	}
  1509  
  1510  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1511  }
  1512  
  1513  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1514  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1515  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1516  // operations can be combined. This functions assumes the two opcodes can
  1517  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1518  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1519  	mb := s
  1520  	r := 64 - s
  1521  	// A larger mb is a smaller mask.
  1522  	if (encoded>>8)&0xFF < mb {
  1523  		encoded = (encoded &^ 0xFF00) | mb<<8
  1524  	}
  1525  	// The rotate is expected to be 0.
  1526  	if (encoded & 0xFF0000) != 0 {
  1527  		panic("non-zero rotate")
  1528  	}
  1529  	return encoded | r<<16
  1530  }
  1531  
  1532  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1533  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1534  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1535  	auxint := uint64(sauxint)
  1536  	rotate = int64((auxint >> 16) & 0xFF)
  1537  	mb = int64((auxint >> 8) & 0xFF)
  1538  	me = int64((auxint >> 0) & 0xFF)
  1539  	nbits := int64((auxint >> 24) & 0xFF)
  1540  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1541  	if mb > me {
  1542  		mask = ^mask
  1543  	}
  1544  	if nbits == 32 {
  1545  		mask = uint64(uint32(mask))
  1546  	}
  1547  
  1548  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1549  	// is inclusive.
  1550  	me = (me - 1) & (nbits - 1)
  1551  	return
  1552  }
  1553  
  1554  // This verifies that the mask is a set of
  1555  // consecutive bits including the least
  1556  // significant bit.
  1557  func isPPC64ValidShiftMask(v int64) bool {
  1558  	if (v != 0) && ((v+1)&v) == 0 {
  1559  		return true
  1560  	}
  1561  	return false
  1562  }
  1563  
  1564  func getPPC64ShiftMaskLength(v int64) int64 {
  1565  	return int64(bits.Len64(uint64(v)))
  1566  }
  1567  
  1568  // Decompose a shift right into an equivalent rotate/mask,
  1569  // and return mask & m.
  1570  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1571  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1572  	return m & int64(smask)
  1573  }
  1574  
  1575  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1576  func mergePPC64AndSrwi(m, s int64) int64 {
  1577  	mask := mergePPC64RShiftMask(m, s, 32)
  1578  	if !isPPC64WordRotateMask(mask) {
  1579  		return 0
  1580  	}
  1581  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1582  }
  1583  
  1584  // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1585  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1586  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1587  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1588  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1589  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1590  
  1591  	// Rewrite mask to apply after the final left shift.
  1592  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1593  
  1594  	r_1 := 32 - srw
  1595  	r_2 := GetPPC64Shiftsh(sld)
  1596  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1597  
  1598  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1599  		return 0
  1600  	}
  1601  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1602  }
  1603  
  1604  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1605  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1606  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1607  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1608  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1609  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1610  
  1611  	// combine the masks, and adjust for the final left shift.
  1612  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1613  	r_2 := GetPPC64Shiftsh(int64(sld))
  1614  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1615  
  1616  	// Verify the result is still a valid bitmask of <= 32 bits.
  1617  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1618  		return 0
  1619  	}
  1620  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1621  }
  1622  
  1623  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1624  // or return 0 if they cannot be combined.
  1625  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1626  	if sld > srw || srw >= 32 {
  1627  		return 0
  1628  	}
  1629  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1630  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1631  	mask := (mask_r & mask_l) << uint(sld)
  1632  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1633  }
  1634  
  1635  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1636  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1637  // of op.
  1638  //
  1639  // E.g consider the case:
  1640  // a = (ADD x y)
  1641  // b = (CMPconst [0] a)
  1642  // c = (OR a z)
  1643  //
  1644  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1645  // would produce:
  1646  // a  = (ADD x y)
  1647  // a' = (ADDCC x y)
  1648  // a” = (Select0 a')
  1649  // b  = (CMPconst [0] a”)
  1650  // c  = (OR a z)
  1651  //
  1652  // which makes it impossible to rewrite the second user. Instead the result
  1653  // of this conversion is:
  1654  // a' = (ADDCC x y)
  1655  // a  = (Select0 a')
  1656  // b  = (CMPconst [0] a)
  1657  // c  = (OR a z)
  1658  //
  1659  // Which makes it trivial to rewrite b using a lowering rule.
  1660  func convertPPC64OpToOpCC(op *Value) *Value {
  1661  	ccOpMap := map[Op]Op{
  1662  		OpPPC64ADD:      OpPPC64ADDCC,
  1663  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1664  		OpPPC64AND:      OpPPC64ANDCC,
  1665  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1666  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1667  		OpPPC64OR:       OpPPC64ORCC,
  1668  		OpPPC64SUB:      OpPPC64SUBCC,
  1669  		OpPPC64NEG:      OpPPC64NEGCC,
  1670  		OpPPC64NOR:      OpPPC64NORCC,
  1671  		OpPPC64XOR:      OpPPC64XORCC,
  1672  	}
  1673  	b := op.Block
  1674  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1675  	opCC.AddArgs(op.Args...)
  1676  	op.reset(OpSelect0)
  1677  	op.AddArgs(opCC)
  1678  	return op
  1679  }
  1680  
  1681  // Convenience function to rotate a 32 bit constant value by another constant.
  1682  func rotateLeft32(v, rotate int64) int64 {
  1683  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1684  }
  1685  
  1686  func rotateRight64(v, rotate int64) int64 {
  1687  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1688  }
  1689  
  1690  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1691  func armBFAuxInt(lsb, width int64) arm64BitField {
  1692  	if lsb < 0 || lsb > 63 {
  1693  		panic("ARM(64) bit field lsb constant out of range")
  1694  	}
  1695  	if width < 1 || lsb+width > 64 {
  1696  		panic("ARM(64) bit field width constant out of range")
  1697  	}
  1698  	return arm64BitField(width | lsb<<8)
  1699  }
  1700  
  1701  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1702  func (bfc arm64BitField) getARM64BFlsb() int64 {
  1703  	return int64(uint64(bfc) >> 8)
  1704  }
  1705  
  1706  // returns the width part of the auxInt field of arm64 bitfield ops.
  1707  func (bfc arm64BitField) getARM64BFwidth() int64 {
  1708  	return int64(bfc) & 0xff
  1709  }
  1710  
  1711  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1712  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1713  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1714  	return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1715  }
  1716  
  1717  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1718  func arm64BFWidth(mask, rshift int64) int64 {
  1719  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1720  	if shiftedMask == 0 {
  1721  		panic("ARM64 BF mask is zero")
  1722  	}
  1723  	return nto(shiftedMask)
  1724  }
  1725  
  1726  // sizeof returns the size of t in bytes.
  1727  // It will panic if t is not a *types.Type.
  1728  func sizeof(t interface{}) int64 {
  1729  	return t.(*types.Type).Size()
  1730  }
  1731  
  1732  // registerizable reports whether t is a primitive type that fits in
  1733  // a register. It assumes float64 values will always fit into registers
  1734  // even if that isn't strictly true.
  1735  func registerizable(b *Block, typ *types.Type) bool {
  1736  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1737  		return true
  1738  	}
  1739  	if typ.IsInteger() {
  1740  		return typ.Size() <= b.Func.Config.RegSize
  1741  	}
  1742  	return false
  1743  }
  1744  
  1745  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1746  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1747  	f := v.Block.Func
  1748  	if !f.Config.Race {
  1749  		return false
  1750  	}
  1751  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1752  		return false
  1753  	}
  1754  	for _, b := range f.Blocks {
  1755  		for _, v := range b.Values {
  1756  			switch v.Op {
  1757  			case OpStaticCall, OpStaticLECall:
  1758  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1759  				// Allow calls to panic*
  1760  				s := v.Aux.(*AuxCall).Fn.String()
  1761  				switch s {
  1762  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1763  					"runtime.panicdivide", "runtime.panicwrap",
  1764  					"runtime.panicshift":
  1765  					continue
  1766  				}
  1767  				// If we encountered any call, we need to keep racefunc*,
  1768  				// for accurate stacktraces.
  1769  				return false
  1770  			case OpPanicBounds, OpPanicExtend:
  1771  				// Note: these are panic generators that are ok (like the static calls above).
  1772  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1773  				// We must keep the race functions if there are any other call types.
  1774  				return false
  1775  			}
  1776  		}
  1777  	}
  1778  	if isSameCall(sym, "runtime.racefuncenter") {
  1779  		// TODO REGISTER ABI this needs to be cleaned up.
  1780  		// If we're removing racefuncenter, remove its argument as well.
  1781  		if v.Args[0].Op != OpStore {
  1782  			if v.Op == OpStaticLECall {
  1783  				// there is no store, yet.
  1784  				return true
  1785  			}
  1786  			return false
  1787  		}
  1788  		mem := v.Args[0].Args[2]
  1789  		v.Args[0].reset(OpCopy)
  1790  		v.Args[0].AddArg(mem)
  1791  	}
  1792  	return true
  1793  }
  1794  
  1795  // symIsRO reports whether sym is a read-only global.
  1796  func symIsRO(sym interface{}) bool {
  1797  	lsym := sym.(*obj.LSym)
  1798  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1799  }
  1800  
  1801  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1802  func symIsROZero(sym Sym) bool {
  1803  	lsym := sym.(*obj.LSym)
  1804  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1805  		return false
  1806  	}
  1807  	for _, b := range lsym.P {
  1808  		if b != 0 {
  1809  			return false
  1810  		}
  1811  	}
  1812  	return true
  1813  }
  1814  
  1815  // isFixed32 returns true if the int32 at offset off in symbol sym
  1816  // is known and constant.
  1817  func isFixed32(c *Config, sym Sym, off int64) bool {
  1818  	return isFixed(c, sym, off, 4)
  1819  }
  1820  
  1821  // isFixed returns true if the range [off,off+size] of the symbol sym
  1822  // is known and constant.
  1823  func isFixed(c *Config, sym Sym, off, size int64) bool {
  1824  	lsym := sym.(*obj.LSym)
  1825  	if lsym.Extra == nil {
  1826  		return false
  1827  	}
  1828  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1829  		if off == 2*c.PtrSize && size == 4 {
  1830  			return true // type hash field
  1831  		}
  1832  	}
  1833  	return false
  1834  }
  1835  func fixed32(c *Config, sym Sym, off int64) int32 {
  1836  	lsym := sym.(*obj.LSym)
  1837  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1838  		if off == 2*c.PtrSize {
  1839  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  1840  		}
  1841  	}
  1842  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  1843  	return 0
  1844  }
  1845  
  1846  // isFixedSym returns true if the contents of sym at the given offset
  1847  // is known and is the constant address of another symbol.
  1848  func isFixedSym(sym Sym, off int64) bool {
  1849  	lsym := sym.(*obj.LSym)
  1850  	switch {
  1851  	case lsym.Type == objabi.SRODATA:
  1852  		// itabs, dictionaries
  1853  	default:
  1854  		return false
  1855  	}
  1856  	for _, r := range lsym.R {
  1857  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  1858  			return true
  1859  		}
  1860  	}
  1861  	return false
  1862  }
  1863  func fixedSym(f *Func, sym Sym, off int64) Sym {
  1864  	lsym := sym.(*obj.LSym)
  1865  	for _, r := range lsym.R {
  1866  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  1867  			if strings.HasPrefix(r.Sym.Name, "type:") {
  1868  				// In case we're loading a type out of a dictionary, we need to record
  1869  				// that the containing function might put that type in an interface.
  1870  				// That information is currently recorded in relocations in the dictionary,
  1871  				// but if we perform this load at compile time then the dictionary
  1872  				// might be dead.
  1873  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1874  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  1875  				// Same, but if we're using an itab we need to record that the
  1876  				// itab._type might be put in an interface.
  1877  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1878  			}
  1879  			return r.Sym
  1880  		}
  1881  	}
  1882  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  1883  	return nil
  1884  }
  1885  
  1886  // read8 reads one byte from the read-only global sym at offset off.
  1887  func read8(sym interface{}, off int64) uint8 {
  1888  	lsym := sym.(*obj.LSym)
  1889  	if off >= int64(len(lsym.P)) || off < 0 {
  1890  		// Invalid index into the global sym.
  1891  		// This can happen in dead code, so we don't want to panic.
  1892  		// Just return any value, it will eventually get ignored.
  1893  		// See issue 29215.
  1894  		return 0
  1895  	}
  1896  	return lsym.P[off]
  1897  }
  1898  
  1899  // read16 reads two bytes from the read-only global sym at offset off.
  1900  func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
  1901  	lsym := sym.(*obj.LSym)
  1902  	// lsym.P is written lazily.
  1903  	// Bytes requested after the end of lsym.P are 0.
  1904  	var src []byte
  1905  	if 0 <= off && off < int64(len(lsym.P)) {
  1906  		src = lsym.P[off:]
  1907  	}
  1908  	buf := make([]byte, 2)
  1909  	copy(buf, src)
  1910  	return byteorder.Uint16(buf)
  1911  }
  1912  
  1913  // read32 reads four bytes from the read-only global sym at offset off.
  1914  func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
  1915  	lsym := sym.(*obj.LSym)
  1916  	var src []byte
  1917  	if 0 <= off && off < int64(len(lsym.P)) {
  1918  		src = lsym.P[off:]
  1919  	}
  1920  	buf := make([]byte, 4)
  1921  	copy(buf, src)
  1922  	return byteorder.Uint32(buf)
  1923  }
  1924  
  1925  // read64 reads eight bytes from the read-only global sym at offset off.
  1926  func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
  1927  	lsym := sym.(*obj.LSym)
  1928  	var src []byte
  1929  	if 0 <= off && off < int64(len(lsym.P)) {
  1930  		src = lsym.P[off:]
  1931  	}
  1932  	buf := make([]byte, 8)
  1933  	copy(buf, src)
  1934  	return byteorder.Uint64(buf)
  1935  }
  1936  
  1937  // sequentialAddresses reports true if it can prove that x + n == y
  1938  func sequentialAddresses(x, y *Value, n int64) bool {
  1939  	if x == y && n == 0 {
  1940  		return true
  1941  	}
  1942  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  1943  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1944  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1945  		return true
  1946  	}
  1947  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  1948  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1949  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1950  		return true
  1951  	}
  1952  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  1953  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1954  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1955  		return true
  1956  	}
  1957  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  1958  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1959  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1960  		return true
  1961  	}
  1962  	return false
  1963  }
  1964  
  1965  // flagConstant represents the result of a compile-time comparison.
  1966  // The sense of these flags does not necessarily represent the hardware's notion
  1967  // of a flags register - these are just a compile-time construct.
  1968  // We happen to match the semantics to those of arm/arm64.
  1969  // Note that these semantics differ from x86: the carry flag has the opposite
  1970  // sense on a subtraction!
  1971  //
  1972  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  1973  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  1974  //	 (because it does x + ^y + C).
  1975  //
  1976  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  1977  type flagConstant uint8
  1978  
  1979  // N reports whether the result of an operation is negative (high bit set).
  1980  func (fc flagConstant) N() bool {
  1981  	return fc&1 != 0
  1982  }
  1983  
  1984  // Z reports whether the result of an operation is 0.
  1985  func (fc flagConstant) Z() bool {
  1986  	return fc&2 != 0
  1987  }
  1988  
  1989  // C reports whether an unsigned add overflowed (carry), or an
  1990  // unsigned subtract did not underflow (borrow).
  1991  func (fc flagConstant) C() bool {
  1992  	return fc&4 != 0
  1993  }
  1994  
  1995  // V reports whether a signed operation overflowed or underflowed.
  1996  func (fc flagConstant) V() bool {
  1997  	return fc&8 != 0
  1998  }
  1999  
  2000  func (fc flagConstant) eq() bool {
  2001  	return fc.Z()
  2002  }
  2003  func (fc flagConstant) ne() bool {
  2004  	return !fc.Z()
  2005  }
  2006  func (fc flagConstant) lt() bool {
  2007  	return fc.N() != fc.V()
  2008  }
  2009  func (fc flagConstant) le() bool {
  2010  	return fc.Z() || fc.lt()
  2011  }
  2012  func (fc flagConstant) gt() bool {
  2013  	return !fc.Z() && fc.ge()
  2014  }
  2015  func (fc flagConstant) ge() bool {
  2016  	return fc.N() == fc.V()
  2017  }
  2018  func (fc flagConstant) ult() bool {
  2019  	return !fc.C()
  2020  }
  2021  func (fc flagConstant) ule() bool {
  2022  	return fc.Z() || fc.ult()
  2023  }
  2024  func (fc flagConstant) ugt() bool {
  2025  	return !fc.Z() && fc.uge()
  2026  }
  2027  func (fc flagConstant) uge() bool {
  2028  	return fc.C()
  2029  }
  2030  
  2031  func (fc flagConstant) ltNoov() bool {
  2032  	return fc.lt() && !fc.V()
  2033  }
  2034  func (fc flagConstant) leNoov() bool {
  2035  	return fc.le() && !fc.V()
  2036  }
  2037  func (fc flagConstant) gtNoov() bool {
  2038  	return fc.gt() && !fc.V()
  2039  }
  2040  func (fc flagConstant) geNoov() bool {
  2041  	return fc.ge() && !fc.V()
  2042  }
  2043  
  2044  func (fc flagConstant) String() string {
  2045  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2046  }
  2047  
  2048  type flagConstantBuilder struct {
  2049  	N bool
  2050  	Z bool
  2051  	C bool
  2052  	V bool
  2053  }
  2054  
  2055  func (fcs flagConstantBuilder) encode() flagConstant {
  2056  	var fc flagConstant
  2057  	if fcs.N {
  2058  		fc |= 1
  2059  	}
  2060  	if fcs.Z {
  2061  		fc |= 2
  2062  	}
  2063  	if fcs.C {
  2064  		fc |= 4
  2065  	}
  2066  	if fcs.V {
  2067  		fc |= 8
  2068  	}
  2069  	return fc
  2070  }
  2071  
  2072  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2073  //  - the results of the C flag are different
  2074  //  - the results of the V flag when y==minint are different
  2075  
  2076  // addFlags64 returns the flags that would be set from computing x+y.
  2077  func addFlags64(x, y int64) flagConstant {
  2078  	var fcb flagConstantBuilder
  2079  	fcb.Z = x+y == 0
  2080  	fcb.N = x+y < 0
  2081  	fcb.C = uint64(x+y) < uint64(x)
  2082  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2083  	return fcb.encode()
  2084  }
  2085  
  2086  // subFlags64 returns the flags that would be set from computing x-y.
  2087  func subFlags64(x, y int64) flagConstant {
  2088  	var fcb flagConstantBuilder
  2089  	fcb.Z = x-y == 0
  2090  	fcb.N = x-y < 0
  2091  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2092  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2093  	return fcb.encode()
  2094  }
  2095  
  2096  // addFlags32 returns the flags that would be set from computing x+y.
  2097  func addFlags32(x, y int32) flagConstant {
  2098  	var fcb flagConstantBuilder
  2099  	fcb.Z = x+y == 0
  2100  	fcb.N = x+y < 0
  2101  	fcb.C = uint32(x+y) < uint32(x)
  2102  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2103  	return fcb.encode()
  2104  }
  2105  
  2106  // subFlags32 returns the flags that would be set from computing x-y.
  2107  func subFlags32(x, y int32) flagConstant {
  2108  	var fcb flagConstantBuilder
  2109  	fcb.Z = x-y == 0
  2110  	fcb.N = x-y < 0
  2111  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2112  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2113  	return fcb.encode()
  2114  }
  2115  
  2116  // logicFlags64 returns flags set to the sign/zeroness of x.
  2117  // C and V are set to false.
  2118  func logicFlags64(x int64) flagConstant {
  2119  	var fcb flagConstantBuilder
  2120  	fcb.Z = x == 0
  2121  	fcb.N = x < 0
  2122  	return fcb.encode()
  2123  }
  2124  
  2125  // logicFlags32 returns flags set to the sign/zeroness of x.
  2126  // C and V are set to false.
  2127  func logicFlags32(x int32) flagConstant {
  2128  	var fcb flagConstantBuilder
  2129  	fcb.Z = x == 0
  2130  	fcb.N = x < 0
  2131  	return fcb.encode()
  2132  }
  2133  
  2134  func makeJumpTableSym(b *Block) *obj.LSym {
  2135  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2136  	// The jump table symbol is accessed only from the function symbol.
  2137  	s.Set(obj.AttrStatic, true)
  2138  	return s
  2139  }
  2140  
  2141  // canRotate reports whether the architecture supports
  2142  // rotates of integer registers with the given number of bits.
  2143  func canRotate(c *Config, bits int64) bool {
  2144  	if bits > c.PtrSize*8 {
  2145  		// Don't rewrite to rotates bigger than the machine word.
  2146  		return false
  2147  	}
  2148  	switch c.arch {
  2149  	case "386", "amd64", "arm64":
  2150  		return true
  2151  	case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
  2152  		return bits >= 32
  2153  	default:
  2154  		return false
  2155  	}
  2156  }
  2157  
  2158  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2159  func isARM64bitcon(x uint64) bool {
  2160  	if x == 1<<64-1 || x == 0 {
  2161  		return false
  2162  	}
  2163  	// determine the period and sign-extend a unit to 64 bits
  2164  	switch {
  2165  	case x != x>>32|x<<32:
  2166  		// period is 64
  2167  		// nothing to do
  2168  	case x != x>>16|x<<48:
  2169  		// period is 32
  2170  		x = uint64(int64(int32(x)))
  2171  	case x != x>>8|x<<56:
  2172  		// period is 16
  2173  		x = uint64(int64(int16(x)))
  2174  	case x != x>>4|x<<60:
  2175  		// period is 8
  2176  		x = uint64(int64(int8(x)))
  2177  	default:
  2178  		// period is 4 or 2, always true
  2179  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2180  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2181  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2182  		// 0101, 1010             -- 01   rotate, repeat
  2183  		return true
  2184  	}
  2185  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2186  }
  2187  
  2188  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2189  func sequenceOfOnes(x uint64) bool {
  2190  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2191  	y += x
  2192  	return (y-1)&y == 0
  2193  }
  2194  
  2195  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2196  func isARM64addcon(v int64) bool {
  2197  	/* uimm12 or uimm24? */
  2198  	if v < 0 {
  2199  		return false
  2200  	}
  2201  	if (v & 0xFFF) == 0 {
  2202  		v >>= 12
  2203  	}
  2204  	return v <= 0xFFF
  2205  }
  2206  
  2207  // setPos sets the position of v to pos, then returns true.
  2208  // Useful for setting the result of a rewrite's position to
  2209  // something other than the default.
  2210  func setPos(v *Value, pos src.XPos) bool {
  2211  	v.Pos = pos
  2212  	return true
  2213  }
  2214
View as plain text