Black Lives Matter. Support the Equal Justice Initiative.

Source file src/cmd/compile/internal/ssa/expand_calls.go

Documentation: cmd/compile/internal/ssa

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"fmt"
    11  	"sort"
    12  )
    13  
    14  type selKey struct {
    15  	from   *Value
    16  	offset int64
    17  	size   int64
    18  	typ    *types.Type
    19  }
    20  
    21  type offsetKey struct {
    22  	from   *Value
    23  	offset int64
    24  	pt     *types.Type
    25  }
    26  
    27  // expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
    28  // that is more oriented to a platform's ABI.  The SelectN operations that extract results are rewritten into
    29  // more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
    30  // reached.  On the callee side, OpArg nodes are not decomposed until this phase is run.
    31  // TODO results should not be lowered until this phase.
    32  func expandCalls(f *Func) {
    33  	// Calls that need lowering have some number of inputs, including a memory input,
    34  	// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
    35  
    36  	// With the current ABI those inputs need to be converted into stores to memory,
    37  	// rethreading the call's memory input to the first, and the new call now receiving the last.
    38  
    39  	// With the current ABI, the outputs need to be converted to loads, which will all use the call's
    40  	// memory output as their input.
    41  	if !LateCallExpansionEnabledWithin(f) {
    42  		return
    43  	}
    44  	debug := f.pass.debug > 0
    45  
    46  	if debug {
    47  		fmt.Printf("\nexpandsCalls(%s)\n", f.Name)
    48  	}
    49  
    50  	canSSAType := f.fe.CanSSA
    51  	regSize := f.Config.RegSize
    52  	sp, _ := f.spSb()
    53  	typ := &f.Config.Types
    54  	ptrSize := f.Config.PtrSize
    55  
    56  	// For 32-bit, need to deal with decomposition of 64-bit integers, which depends on endianness.
    57  	var hiOffset, lowOffset int64
    58  	if f.Config.BigEndian {
    59  		lowOffset = 4
    60  	} else {
    61  		hiOffset = 4
    62  	}
    63  
    64  	namedSelects := make(map[*Value][]namedVal)
    65  
    66  	sdom := f.Sdom()
    67  
    68  	common := make(map[selKey]*Value)
    69  
    70  	// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
    71  	// that has no 64-bit integer registers.
    72  	intPairTypes := func(et types.EType) (tHi, tLo *types.Type) {
    73  		tHi = typ.UInt32
    74  		if et == types.TINT64 {
    75  			tHi = typ.Int32
    76  		}
    77  		tLo = typ.UInt32
    78  		return
    79  	}
    80  
    81  	// isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
    82  	// that was expanded in an earlier phase (currently, expand_calls is intended to run after decomposeBuiltin,
    83  	// so this is all aggregate types -- small struct and array, complex, interface, string, slice, and 64-bit
    84  	// integer on 32-bit).
    85  	isAlreadyExpandedAggregateType := func(t *types.Type) bool {
    86  		if !canSSAType(t) {
    87  			return false
    88  		}
    89  		return t.IsStruct() || t.IsArray() || t.IsComplex() || t.IsInterface() || t.IsString() || t.IsSlice() ||
    90  			t.Size() > regSize && t.IsInteger()
    91  	}
    92  
    93  	offsets := make(map[offsetKey]*Value)
    94  
    95  	// offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
    96  	// TODO should also optimize offsets from SB?
    97  	offsetFrom := func(from *Value, offset int64, pt *types.Type) *Value {
    98  		if offset == 0 && from.Type == pt { // this is not actually likely
    99  			return from
   100  		}
   101  		// Simplify, canonicalize
   102  		for from.Op == OpOffPtr {
   103  			offset += from.AuxInt
   104  			from = from.Args[0]
   105  		}
   106  		if from == sp {
   107  			return f.ConstOffPtrSP(pt, offset, sp)
   108  		}
   109  		key := offsetKey{from, offset, pt}
   110  		v := offsets[key]
   111  		if v != nil {
   112  			return v
   113  		}
   114  		v = from.Block.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
   115  		offsets[key] = v
   116  		return v
   117  	}
   118  
   119  	// splitSlots splits one "field" (specified by sfx, offset, and ty) out of the LocalSlots in ls and returns the new LocalSlots this generates.
   120  	splitSlots := func(ls []LocalSlot, sfx string, offset int64, ty *types.Type) []LocalSlot {
   121  		var locs []LocalSlot
   122  		for i := range ls {
   123  			locs = append(locs, f.fe.SplitSlot(&ls[i], sfx, offset, ty))
   124  		}
   125  		return locs
   126  	}
   127  
   128  	// removeTrivialWrapperTypes unwraps layers of
   129  	// struct { singleField SomeType } and [1]SomeType
   130  	// until a non-wrapper type is reached.  This is useful
   131  	// for working with assignments to/from interface data
   132  	// fields (either second operand to OpIMake or OpIData)
   133  	// where the wrapping or type conversion can be elided
   134  	// because of type conversions/assertions in source code
   135  	// that do not appear in SSA.
   136  	removeTrivialWrapperTypes := func(t *types.Type) *types.Type {
   137  		for {
   138  			if t.IsStruct() && t.NumFields() == 1 {
   139  				t = t.Field(0).Type
   140  				continue
   141  			}
   142  			if t.IsArray() && t.NumElem() == 1 {
   143  				t = t.Elem()
   144  				continue
   145  			}
   146  			break
   147  		}
   148  		return t
   149  	}
   150  
   151  	// Calls that need lowering have some number of inputs, including a memory input,
   152  	// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
   153  
   154  	// With the current ABI those inputs need to be converted into stores to memory,
   155  	// rethreading the call's memory input to the first, and the new call now receiving the last.
   156  
   157  	// With the current ABI, the outputs need to be converted to loads, which will all use the call's
   158  	// memory output as their input.
   159  
   160  	// rewriteSelect recursively walks from leaf selector to a root (OpSelectN, OpLoad, OpArg)
   161  	// through a chain of Struct/Array/builtin Select operations.  If the chain of selectors does not
   162  	// end in an expected root, it does nothing (this can happen depending on compiler phase ordering).
   163  	// The "leaf" provides the type, the root supplies the container, and the leaf-to-root path
   164  	// accumulates the offset.
   165  	// It emits the code necessary to implement the leaf select operation that leads to the root.
   166  	//
   167  	// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
   168  	var rewriteSelect func(leaf *Value, selector *Value, offset int64) []LocalSlot
   169  	rewriteSelect = func(leaf *Value, selector *Value, offset int64) []LocalSlot {
   170  		if debug {
   171  			fmt.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset)
   172  		}
   173  		var locs []LocalSlot
   174  		leafType := leaf.Type
   175  		if len(selector.Args) > 0 {
   176  			w := selector.Args[0]
   177  			if w.Op == OpCopy {
   178  				for w.Op == OpCopy {
   179  					w = w.Args[0]
   180  				}
   181  				selector.SetArg(0, w)
   182  			}
   183  		}
   184  		switch selector.Op {
   185  		case OpArg:
   186  			if !isAlreadyExpandedAggregateType(selector.Type) {
   187  				if leafType == selector.Type { // OpIData leads us here, sometimes.
   188  					leaf.copyOf(selector)
   189  				} else {
   190  					f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString())
   191  				}
   192  				if debug {
   193  					fmt.Printf("\tOpArg, break\n")
   194  				}
   195  				break
   196  			}
   197  			switch leaf.Op {
   198  			case OpIData, OpStructSelect, OpArraySelect:
   199  				leafType = removeTrivialWrapperTypes(leaf.Type)
   200  			}
   201  			aux := selector.Aux
   202  			auxInt := selector.AuxInt + offset
   203  			if leaf.Block == selector.Block {
   204  				leaf.reset(OpArg)
   205  				leaf.Aux = aux
   206  				leaf.AuxInt = auxInt
   207  				leaf.Type = leafType
   208  			} else {
   209  				w := selector.Block.NewValue0IA(leaf.Pos, OpArg, leafType, auxInt, aux)
   210  				leaf.copyOf(w)
   211  				if debug {
   212  					fmt.Printf("\tnew %s\n", w.LongString())
   213  				}
   214  			}
   215  			for _, s := range namedSelects[selector] {
   216  				locs = append(locs, f.Names[s.locIndex])
   217  			}
   218  
   219  		case OpLoad: // We end up here because of IData of immediate structures.
   220  			// Failure case:
   221  			// (note the failure case is very rare; w/o this case, make.bash and run.bash both pass, as well as
   222  			// the hard cases of building {syscall,math,math/cmplx,math/bits,go/constant} on ppc64le and mips-softfloat).
   223  			//
   224  			// GOSSAFUNC='(*dumper).dump' go build -gcflags=-l -tags=math_big_pure_go cmd/compile/internal/gc
   225  			// cmd/compile/internal/gc/dump.go:136:14: internal compiler error: '(*dumper).dump': not lowered: v827, StructSelect PTR PTR
   226  			// b2: ← b1
   227  			// v20 (+142) = StaticLECall <interface {},mem> {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v8 v1
   228  			// v21 (142) = SelectN <mem> [1] v20
   229  			// v22 (142) = SelectN <interface {}> [0] v20
   230  			// b15: ← b8
   231  			// v71 (+143) = IData <Nodes> v22 (v[Nodes])
   232  			// v73 (+146) = StaticLECall <[]*Node,mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v71 v21
   233  			//
   234  			// translates (w/o the "case OpLoad:" above) to:
   235  			//
   236  			// b2: ← b1
   237  			// v20 (+142) = StaticCall <mem> {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v715
   238  			// v23 (142) = Load <*uintptr> v19 v20
   239  			// v823 (142) = IsNonNil <bool> v23
   240  			// v67 (+143) = Load <*[]*Node> v880 v20
   241  			// b15: ← b8
   242  			// v827 (146) = StructSelect <*[]*Node> [0] v67
   243  			// v846 (146) = Store <mem> {*[]*Node} v769 v827 v20
   244  			// v73 (+146) = StaticCall <mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v846
   245  			// i.e., the struct select is generated and remains in because it is not applied to an actual structure.
   246  			// The OpLoad was created to load the single field of the IData
   247  			// This case removes that StructSelect.
   248  			if leafType != selector.Type {
   249  				f.Fatalf("Unexpected Load as selector, leaf=%s, selector=%s\n", leaf.LongString(), selector.LongString())
   250  			}
   251  			leaf.copyOf(selector)
   252  			for _, s := range namedSelects[selector] {
   253  				locs = append(locs, f.Names[s.locIndex])
   254  			}
   255  
   256  		case OpSelectN:
   257  			// TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
   258  			call := selector.Args[0]
   259  			aux := call.Aux.(*AuxCall)
   260  			which := selector.AuxInt
   261  			if which == aux.NResults() { // mem is after the results.
   262  				// rewrite v as a Copy of call -- the replacement call will produce a mem.
   263  				leaf.copyOf(call)
   264  			} else {
   265  				leafType := removeTrivialWrapperTypes(leaf.Type)
   266  				if canSSAType(leafType) {
   267  					pt := types.NewPtr(leafType)
   268  					off := offsetFrom(sp, offset+aux.OffsetOfResult(which), pt)
   269  					// Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
   270  					if leaf.Block == call.Block {
   271  						leaf.reset(OpLoad)
   272  						leaf.SetArgs2(off, call)
   273  						leaf.Type = leafType
   274  					} else {
   275  						w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
   276  						leaf.copyOf(w)
   277  						if debug {
   278  							fmt.Printf("\tnew %s\n", w.LongString())
   279  						}
   280  					}
   281  					for _, s := range namedSelects[selector] {
   282  						locs = append(locs, f.Names[s.locIndex])
   283  					}
   284  				} else {
   285  					f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString())
   286  				}
   287  			}
   288  
   289  		case OpStructSelect:
   290  			w := selector.Args[0]
   291  			var ls []LocalSlot
   292  			if w.Type.Etype != types.TSTRUCT { // IData artifact
   293  				ls = rewriteSelect(leaf, w, offset)
   294  			} else {
   295  				ls = rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
   296  				if w.Op != OpIData {
   297  					for _, l := range ls {
   298  						locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt)))
   299  					}
   300  				}
   301  			}
   302  
   303  		case OpArraySelect:
   304  			w := selector.Args[0]
   305  			rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
   306  
   307  		case OpInt64Hi:
   308  			w := selector.Args[0]
   309  			ls := rewriteSelect(leaf, w, offset+hiOffset)
   310  			locs = splitSlots(ls, ".hi", hiOffset, leafType)
   311  
   312  		case OpInt64Lo:
   313  			w := selector.Args[0]
   314  			ls := rewriteSelect(leaf, w, offset+lowOffset)
   315  			locs = splitSlots(ls, ".lo", lowOffset, leafType)
   316  
   317  		case OpStringPtr:
   318  			ls := rewriteSelect(leaf, selector.Args[0], offset)
   319  			locs = splitSlots(ls, ".ptr", 0, typ.BytePtr)
   320  
   321  		case OpSlicePtr:
   322  			w := selector.Args[0]
   323  			ls := rewriteSelect(leaf, w, offset)
   324  			locs = splitSlots(ls, ".ptr", 0, types.NewPtr(w.Type.Elem()))
   325  
   326  		case OpITab:
   327  			w := selector.Args[0]
   328  			ls := rewriteSelect(leaf, w, offset)
   329  			sfx := ".itab"
   330  			if w.Type.IsEmptyInterface() {
   331  				sfx = ".type"
   332  			}
   333  			locs = splitSlots(ls, sfx, 0, typ.Uintptr)
   334  
   335  		case OpComplexReal:
   336  			ls := rewriteSelect(leaf, selector.Args[0], offset)
   337  			locs = splitSlots(ls, ".real", 0, leafType)
   338  
   339  		case OpComplexImag:
   340  			ls := rewriteSelect(leaf, selector.Args[0], offset+leafType.Width) // result is FloatNN, width of result is offset of imaginary part.
   341  			locs = splitSlots(ls, ".imag", leafType.Width, leafType)
   342  
   343  		case OpStringLen, OpSliceLen:
   344  			ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize)
   345  			locs = splitSlots(ls, ".len", ptrSize, leafType)
   346  
   347  		case OpIData:
   348  			ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize)
   349  			locs = splitSlots(ls, ".data", ptrSize, leafType)
   350  
   351  		case OpSliceCap:
   352  			ls := rewriteSelect(leaf, selector.Args[0], offset+2*ptrSize)
   353  			locs = splitSlots(ls, ".cap", 2*ptrSize, leafType)
   354  
   355  		case OpCopy: // If it's an intermediate result, recurse
   356  			locs = rewriteSelect(leaf, selector.Args[0], offset)
   357  			for _, s := range namedSelects[selector] {
   358  				// this copy may have had its own name, preserve that, too.
   359  				locs = append(locs, f.Names[s.locIndex])
   360  			}
   361  
   362  		default:
   363  			// Ignore dead ends. These can occur if this phase is run before decompose builtin (which is not intended, but allowed).
   364  		}
   365  
   366  		return locs
   367  	}
   368  
   369  	// storeArgOrLoad converts stores of SSA-able aggregate arguments (passed to a call) into a series of primitive-typed
   370  	// stores of non-aggregate types.  It recursively walks up a chain of selectors until it reaches a Load or an Arg.
   371  	// If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering.
   372  	var storeArgOrLoad func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value
   373  
   374  	// decomposeArgOrLoad is a helper for storeArgOrLoad.
   375  	// It decomposes a Load or an Arg into smaller parts, parameterized by the decomposeOne and decomposeTwo functions
   376  	// passed to it, and returns the new mem. If the type does not match one of the expected aggregate types, it returns nil instead.
   377  	decomposeArgOrLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64,
   378  		decomposeOne func(pos src.XPos, b *Block, base, source, mem *Value, t1 *types.Type, offArg, offStore int64) *Value,
   379  		decomposeTwo func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value) *Value {
   380  		u := source.Type
   381  		switch u.Etype {
   382  		case types.TARRAY:
   383  			elem := u.Elem()
   384  			for i := int64(0); i < u.NumElem(); i++ {
   385  				elemOff := i * elem.Size()
   386  				mem = decomposeOne(pos, b, base, source, mem, elem, source.AuxInt+elemOff, offset+elemOff)
   387  				pos = pos.WithNotStmt()
   388  			}
   389  			return mem
   390  		case types.TSTRUCT:
   391  			for i := 0; i < u.NumFields(); i++ {
   392  				fld := u.Field(i)
   393  				mem = decomposeOne(pos, b, base, source, mem, fld.Type, source.AuxInt+fld.Offset, offset+fld.Offset)
   394  				pos = pos.WithNotStmt()
   395  			}
   396  			return mem
   397  		case types.TINT64, types.TUINT64:
   398  			if t.Width == regSize {
   399  				break
   400  			}
   401  			tHi, tLo := intPairTypes(t.Etype)
   402  			mem = decomposeOne(pos, b, base, source, mem, tHi, source.AuxInt+hiOffset, offset+hiOffset)
   403  			pos = pos.WithNotStmt()
   404  			return decomposeOne(pos, b, base, source, mem, tLo, source.AuxInt+lowOffset, offset+lowOffset)
   405  		case types.TINTER:
   406  			return decomposeTwo(pos, b, base, source, mem, typ.Uintptr, typ.BytePtr, source.AuxInt, offset)
   407  		case types.TSTRING:
   408  			return decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset)
   409  		case types.TCOMPLEX64:
   410  			return decomposeTwo(pos, b, base, source, mem, typ.Float32, typ.Float32, source.AuxInt, offset)
   411  		case types.TCOMPLEX128:
   412  			return decomposeTwo(pos, b, base, source, mem, typ.Float64, typ.Float64, source.AuxInt, offset)
   413  		case types.TSLICE:
   414  			mem = decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset)
   415  			return decomposeOne(pos, b, base, source, mem, typ.Int, source.AuxInt+2*ptrSize, offset+2*ptrSize)
   416  		}
   417  		return nil
   418  	}
   419  
   420  	// storeOneArg creates a decomposed (one step) arg that is then stored.
   421  	// pos and b locate the store instruction, base is the base of the store target, source is the "base" of the value input,
   422  	// mem is the input mem, t is the type in question, and offArg and offStore are the offsets from the respective bases.
   423  	storeOneArg := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value {
   424  		w := common[selKey{source, offArg, t.Width, t}]
   425  		if w == nil {
   426  			w = source.Block.NewValue0IA(source.Pos, OpArg, t, offArg, source.Aux)
   427  			common[selKey{source, offArg, t.Width, t}] = w
   428  		}
   429  		return storeArgOrLoad(pos, b, base, w, mem, t, offStore)
   430  	}
   431  
   432  	// storeOneLoad creates a decomposed (one step) load that is then stored.
   433  	storeOneLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value {
   434  		from := offsetFrom(source.Args[0], offArg, types.NewPtr(t))
   435  		w := source.Block.NewValue2(source.Pos, OpLoad, t, from, mem)
   436  		return storeArgOrLoad(pos, b, base, w, mem, t, offStore)
   437  	}
   438  
   439  	storeTwoArg := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value {
   440  		mem = storeOneArg(pos, b, base, source, mem, t1, offArg, offStore)
   441  		pos = pos.WithNotStmt()
   442  		t1Size := t1.Size()
   443  		return storeOneArg(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size)
   444  	}
   445  
   446  	storeTwoLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value {
   447  		mem = storeOneLoad(pos, b, base, source, mem, t1, offArg, offStore)
   448  		pos = pos.WithNotStmt()
   449  		t1Size := t1.Size()
   450  		return storeOneLoad(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size)
   451  	}
   452  
   453  	storeArgOrLoad = func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value {
   454  		if debug {
   455  			fmt.Printf("\tstoreArgOrLoad(%s;  %s;  %s;  %s; %d)\n", base.LongString(), source.LongString(), mem.String(), t.String(), offset)
   456  		}
   457  
   458  		switch source.Op {
   459  		case OpCopy:
   460  			return storeArgOrLoad(pos, b, base, source.Args[0], mem, t, offset)
   461  
   462  		case OpLoad:
   463  			ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneLoad, storeTwoLoad)
   464  			if ret != nil {
   465  				return ret
   466  			}
   467  
   468  		case OpArg:
   469  			ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneArg, storeTwoArg)
   470  			if ret != nil {
   471  				return ret
   472  			}
   473  
   474  		case OpArrayMake0, OpStructMake0:
   475  			return mem
   476  
   477  		case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
   478  			for i := 0; i < t.NumFields(); i++ {
   479  				fld := t.Field(i)
   480  				mem = storeArgOrLoad(pos, b, base, source.Args[i], mem, fld.Type, offset+fld.Offset)
   481  				pos = pos.WithNotStmt()
   482  			}
   483  			return mem
   484  
   485  		case OpArrayMake1:
   486  			return storeArgOrLoad(pos, b, base, source.Args[0], mem, t.Elem(), offset)
   487  
   488  		case OpInt64Make:
   489  			tHi, tLo := intPairTypes(t.Etype)
   490  			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tHi, offset+hiOffset)
   491  			pos = pos.WithNotStmt()
   492  			return storeArgOrLoad(pos, b, base, source.Args[1], mem, tLo, offset+lowOffset)
   493  
   494  		case OpComplexMake:
   495  			tPart := typ.Float32
   496  			wPart := t.Width / 2
   497  			if wPart == 8 {
   498  				tPart = typ.Float64
   499  			}
   500  			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tPart, offset)
   501  			pos = pos.WithNotStmt()
   502  			return storeArgOrLoad(pos, b, base, source.Args[1], mem, tPart, offset+wPart)
   503  
   504  		case OpIMake:
   505  			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.Uintptr, offset)
   506  			pos = pos.WithNotStmt()
   507  			return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.BytePtr, offset+ptrSize)
   508  
   509  		case OpStringMake:
   510  			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset)
   511  			pos = pos.WithNotStmt()
   512  			return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize)
   513  
   514  		case OpSliceMake:
   515  			mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset)
   516  			pos = pos.WithNotStmt()
   517  			mem = storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize)
   518  			return storeArgOrLoad(pos, b, base, source.Args[2], mem, typ.Int, offset+2*ptrSize)
   519  		}
   520  
   521  		// For nodes that cannot be taken apart -- OpSelectN, other structure selectors.
   522  		switch t.Etype {
   523  		case types.TARRAY:
   524  			elt := t.Elem()
   525  			if source.Type != t && t.NumElem() == 1 && elt.Width == t.Width && t.Width == regSize {
   526  				t = removeTrivialWrapperTypes(t)
   527  				// it could be a leaf type, but the "leaf" could be complex64 (for example)
   528  				return storeArgOrLoad(pos, b, base, source, mem, t, offset)
   529  			}
   530  			for i := int64(0); i < t.NumElem(); i++ {
   531  				sel := source.Block.NewValue1I(pos, OpArraySelect, elt, i, source)
   532  				mem = storeArgOrLoad(pos, b, base, sel, mem, elt, offset+i*elt.Width)
   533  				pos = pos.WithNotStmt()
   534  			}
   535  			return mem
   536  
   537  		case types.TSTRUCT:
   538  			if source.Type != t && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
   539  				// This peculiar test deals with accesses to immediate interface data.
   540  				// It works okay because everything is the same size.
   541  				// Example code that triggers this can be found in go/constant/value.go, function ToComplex
   542  				// v119 (+881) = IData <intVal> v6
   543  				// v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
   544  				// This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
   545  				// Guard against "struct{struct{*foo}}"
   546  				// Other rewriting phases create minor glitches when they transform IData, for instance the
   547  				// interface-typed Arg "x" of ToFloat in go/constant/value.go
   548  				//   v6 (858) = Arg <Value> {x} (x[Value], x[Value])
   549  				// is rewritten by decomposeArgs into
   550  				//   v141 (858) = Arg <uintptr> {x}
   551  				//   v139 (858) = Arg <*uint8> {x} [8]
   552  				// because of a type case clause on line 862 of go/constant/value.go
   553  				//  	case intVal:
   554  				//		   return itof(x)
   555  				// v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of
   556  				// of a *uint8, which does not succeed.
   557  				t = removeTrivialWrapperTypes(t)
   558  				// it could be a leaf type, but the "leaf" could be complex64 (for example)
   559  				return storeArgOrLoad(pos, b, base, source, mem, t, offset)
   560  			}
   561  
   562  			for i := 0; i < t.NumFields(); i++ {
   563  				fld := t.Field(i)
   564  				sel := source.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source)
   565  				mem = storeArgOrLoad(pos, b, base, sel, mem, fld.Type, offset+fld.Offset)
   566  				pos = pos.WithNotStmt()
   567  			}
   568  			return mem
   569  
   570  		case types.TINT64, types.TUINT64:
   571  			if t.Width == regSize {
   572  				break
   573  			}
   574  			tHi, tLo := intPairTypes(t.Etype)
   575  			sel := source.Block.NewValue1(pos, OpInt64Hi, tHi, source)
   576  			mem = storeArgOrLoad(pos, b, base, sel, mem, tHi, offset+hiOffset)
   577  			pos = pos.WithNotStmt()
   578  			sel = source.Block.NewValue1(pos, OpInt64Lo, tLo, source)
   579  			return storeArgOrLoad(pos, b, base, sel, mem, tLo, offset+lowOffset)
   580  
   581  		case types.TINTER:
   582  			sel := source.Block.NewValue1(pos, OpITab, typ.BytePtr, source)
   583  			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset)
   584  			pos = pos.WithNotStmt()
   585  			sel = source.Block.NewValue1(pos, OpIData, typ.BytePtr, source)
   586  			return storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset+ptrSize)
   587  
   588  		case types.TSTRING:
   589  			sel := source.Block.NewValue1(pos, OpStringPtr, typ.BytePtr, source)
   590  			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset)
   591  			pos = pos.WithNotStmt()
   592  			sel = source.Block.NewValue1(pos, OpStringLen, typ.Int, source)
   593  			return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize)
   594  
   595  		case types.TSLICE:
   596  			et := types.NewPtr(t.Elem())
   597  			sel := source.Block.NewValue1(pos, OpSlicePtr, et, source)
   598  			mem = storeArgOrLoad(pos, b, base, sel, mem, et, offset)
   599  			pos = pos.WithNotStmt()
   600  			sel = source.Block.NewValue1(pos, OpSliceLen, typ.Int, source)
   601  			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize)
   602  			sel = source.Block.NewValue1(pos, OpSliceCap, typ.Int, source)
   603  			return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+2*ptrSize)
   604  
   605  		case types.TCOMPLEX64:
   606  			sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float32, source)
   607  			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset)
   608  			pos = pos.WithNotStmt()
   609  			sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float32, source)
   610  			return storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset+4)
   611  
   612  		case types.TCOMPLEX128:
   613  			sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float64, source)
   614  			mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset)
   615  			pos = pos.WithNotStmt()
   616  			sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float64, source)
   617  			return storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset+8)
   618  		}
   619  
   620  		dst := offsetFrom(base, offset, types.NewPtr(t))
   621  		x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem)
   622  		if debug {
   623  			fmt.Printf("\t\tstoreArg returns %s\n", x.LongString())
   624  		}
   625  		return x
   626  	}
   627  
   628  	// rewriteArgs removes all the Args from a call and converts the call args into appropriate
   629  	// stores (or later, register movement).  Extra args for interface and closure calls are ignored,
   630  	// but removed.
   631  	rewriteArgs := func(v *Value, firstArg int) *Value {
   632  		// Thread the stores on the memory arg
   633  		aux := v.Aux.(*AuxCall)
   634  		pos := v.Pos.WithNotStmt()
   635  		m0 := v.Args[len(v.Args)-1]
   636  		mem := m0
   637  		for i, a := range v.Args {
   638  			if i < firstArg {
   639  				continue
   640  			}
   641  			if a == m0 { // mem is last.
   642  				break
   643  			}
   644  			auxI := int64(i - firstArg)
   645  			if a.Op == OpDereference {
   646  				if a.MemoryArg() != m0 {
   647  					f.Fatalf("Op...LECall and OpDereference have mismatched mem, %s and %s", v.LongString(), a.LongString())
   648  				}
   649  				// "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
   650  				// TODO this will be more complicated with registers in the picture.
   651  				source := a.Args[0]
   652  				dst := f.ConstOffPtrSP(source.Type, aux.OffsetOfArg(auxI), sp)
   653  				if a.Uses == 1 && a.Block == v.Block {
   654  					a.reset(OpMove)
   655  					a.Pos = pos
   656  					a.Type = types.TypeMem
   657  					a.Aux = aux.TypeOfArg(auxI)
   658  					a.AuxInt = aux.SizeOfArg(auxI)
   659  					a.SetArgs3(dst, source, mem)
   660  					mem = a
   661  				} else {
   662  					mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, source, mem)
   663  					mem.AuxInt = aux.SizeOfArg(auxI)
   664  				}
   665  			} else {
   666  				if debug {
   667  					fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
   668  				}
   669  				mem = storeArgOrLoad(pos, v.Block, sp, a, mem, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
   670  			}
   671  		}
   672  		v.resetArgs()
   673  		return mem
   674  	}
   675  
   676  	// TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here.
   677  
   678  	// Step 0: rewrite the calls to convert incoming args to stores.
   679  	for _, b := range f.Blocks {
   680  		for _, v := range b.Values {
   681  			switch v.Op {
   682  			case OpStaticLECall:
   683  				mem := rewriteArgs(v, 0)
   684  				v.SetArgs1(mem)
   685  			case OpClosureLECall:
   686  				code := v.Args[0]
   687  				context := v.Args[1]
   688  				mem := rewriteArgs(v, 2)
   689  				v.SetArgs3(code, context, mem)
   690  			case OpInterLECall:
   691  				code := v.Args[0]
   692  				mem := rewriteArgs(v, 1)
   693  				v.SetArgs2(code, mem)
   694  			}
   695  		}
   696  	}
   697  
   698  	for i, name := range f.Names {
   699  		t := name.Type
   700  		if isAlreadyExpandedAggregateType(t) {
   701  			for j, v := range f.NamedValues[name] {
   702  				if v.Op == OpSelectN || v.Op == OpArg && isAlreadyExpandedAggregateType(v.Type) {
   703  					ns := namedSelects[v]
   704  					namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j})
   705  				}
   706  			}
   707  		}
   708  	}
   709  
   710  	// Step 1: any stores of aggregates remaining are believed to be sourced from call results or args.
   711  	// Decompose those stores into a series of smaller stores, adding selection ops as necessary.
   712  	for _, b := range f.Blocks {
   713  		for _, v := range b.Values {
   714  			if v.Op == OpStore {
   715  				t := v.Aux.(*types.Type)
   716  				source := v.Args[1]
   717  				tSrc := source.Type
   718  				iAEATt := isAlreadyExpandedAggregateType(t)
   719  
   720  				if !iAEATt {
   721  					// guarding against store immediate struct into interface data field -- store type is *uint8
   722  					// TODO can this happen recursively?
   723  					iAEATt = isAlreadyExpandedAggregateType(tSrc)
   724  					if iAEATt {
   725  						t = tSrc
   726  					}
   727  				}
   728  				if iAEATt {
   729  					if debug {
   730  						fmt.Printf("Splitting store %s\n", v.LongString())
   731  					}
   732  					dst, mem := v.Args[0], v.Args[2]
   733  					mem = storeArgOrLoad(v.Pos, b, dst, source, mem, t, 0)
   734  					v.copyOf(mem)
   735  				}
   736  			}
   737  		}
   738  	}
   739  
   740  	val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.
   741  
   742  	// Step 2: transform or accumulate selection operations for rewrite in topological order.
   743  	//
   744  	// Aggregate types that have already (in earlier phases) been transformed must be lowered comprehensively to finish
   745  	// the transformation (user-defined structs and arrays, slices, strings, interfaces, complex, 64-bit on 32-bit architectures),
   746  	//
   747  	// Any select-for-addressing applied to call results can be transformed directly.
   748  	for _, b := range f.Blocks {
   749  		for _, v := range b.Values {
   750  			// Accumulate chains of selectors for processing in topological order
   751  			switch v.Op {
   752  			case OpStructSelect, OpArraySelect,
   753  				OpIData, OpITab,
   754  				OpStringPtr, OpStringLen,
   755  				OpSlicePtr, OpSliceLen, OpSliceCap,
   756  				OpComplexReal, OpComplexImag,
   757  				OpInt64Hi, OpInt64Lo:
   758  				w := v.Args[0]
   759  				switch w.Op {
   760  				case OpStructSelect, OpArraySelect, OpSelectN, OpArg:
   761  					val2Preds[w] += 1
   762  					if debug {
   763  						fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
   764  					}
   765  				}
   766  				fallthrough
   767  
   768  			case OpSelectN:
   769  				if _, ok := val2Preds[v]; !ok {
   770  					val2Preds[v] = 0
   771  					if debug {
   772  						fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
   773  					}
   774  				}
   775  
   776  			case OpArg:
   777  				if !isAlreadyExpandedAggregateType(v.Type) {
   778  					continue
   779  				}
   780  				if _, ok := val2Preds[v]; !ok {
   781  					val2Preds[v] = 0
   782  					if debug {
   783  						fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
   784  					}
   785  				}
   786  
   787  			case OpSelectNAddr:
   788  				// Do these directly, there are no chains of selectors.
   789  				call := v.Args[0]
   790  				which := v.AuxInt
   791  				aux := call.Aux.(*AuxCall)
   792  				pt := v.Type
   793  				off := offsetFrom(sp, aux.OffsetOfResult(which), pt)
   794  				v.copyOf(off)
   795  			}
   796  		}
   797  	}
   798  
   799  	// Step 3: Compute topological order of selectors,
   800  	// then process it in reverse to eliminate duplicates,
   801  	// then forwards to rewrite selectors.
   802  	//
   803  	// All chains of selectors end up in same block as the call.
   804  
   805  	// Compilation must be deterministic, so sort after extracting first zeroes from map.
   806  	// Sorting allows dominators-last order within each batch,
   807  	// so that the backwards scan for duplicates will most often find copies from dominating blocks (it is best-effort).
   808  	var toProcess []*Value
   809  	less := func(i, j int) bool {
   810  		vi, vj := toProcess[i], toProcess[j]
   811  		bi, bj := vi.Block, vj.Block
   812  		if bi == bj {
   813  			return vi.ID < vj.ID
   814  		}
   815  		return sdom.domorder(bi) > sdom.domorder(bj) // reverse the order to put dominators last.
   816  	}
   817  
   818  	// Accumulate order in allOrdered
   819  	var allOrdered []*Value
   820  	for v, n := range val2Preds {
   821  		if n == 0 {
   822  			allOrdered = append(allOrdered, v)
   823  		}
   824  	}
   825  	last := 0 // allOrdered[0:last] has been top-sorted and processed
   826  	for len(val2Preds) > 0 {
   827  		toProcess = allOrdered[last:]
   828  		last = len(allOrdered)
   829  		sort.SliceStable(toProcess, less)
   830  		for _, v := range toProcess {
   831  			delete(val2Preds, v)
   832  			if v.Op == OpArg {
   833  				continue // no Args[0], hence done.
   834  			}
   835  			w := v.Args[0]
   836  			n, ok := val2Preds[w]
   837  			if !ok {
   838  				continue
   839  			}
   840  			if n == 1 {
   841  				allOrdered = append(allOrdered, w)
   842  				delete(val2Preds, w)
   843  				continue
   844  			}
   845  			val2Preds[w] = n - 1
   846  		}
   847  	}
   848  
   849  	common = make(map[selKey]*Value)
   850  	// Rewrite duplicate selectors as copies where possible.
   851  	for i := len(allOrdered) - 1; i >= 0; i-- {
   852  		v := allOrdered[i]
   853  		if v.Op == OpArg {
   854  			continue
   855  		}
   856  		w := v.Args[0]
   857  		if w.Op == OpCopy {
   858  			for w.Op == OpCopy {
   859  				w = w.Args[0]
   860  			}
   861  			v.SetArg(0, w)
   862  		}
   863  		typ := v.Type
   864  		if typ.IsMemory() {
   865  			continue // handled elsewhere, not an indexable result
   866  		}
   867  		size := typ.Width
   868  		offset := int64(0)
   869  		switch v.Op {
   870  		case OpStructSelect:
   871  			if w.Type.Etype == types.TSTRUCT {
   872  				offset = w.Type.FieldOff(int(v.AuxInt))
   873  			} else { // Immediate interface data artifact, offset is zero.
   874  				f.Fatalf("Expand calls interface data problem, func %s, v=%s, w=%s\n", f.Name, v.LongString(), w.LongString())
   875  			}
   876  		case OpArraySelect:
   877  			offset = size * v.AuxInt
   878  		case OpSelectN:
   879  			offset = w.Aux.(*AuxCall).OffsetOfResult(v.AuxInt)
   880  		case OpInt64Hi:
   881  			offset = hiOffset
   882  		case OpInt64Lo:
   883  			offset = lowOffset
   884  		case OpStringLen, OpSliceLen, OpIData:
   885  			offset = ptrSize
   886  		case OpSliceCap:
   887  			offset = 2 * ptrSize
   888  		case OpComplexImag:
   889  			offset = size
   890  		}
   891  		sk := selKey{from: w, size: size, offset: offset, typ: typ}
   892  		dupe := common[sk]
   893  		if dupe == nil {
   894  			common[sk] = v
   895  		} else if sdom.IsAncestorEq(dupe.Block, v.Block) {
   896  			v.copyOf(dupe)
   897  		} else {
   898  			// Because values are processed in dominator order, the old common[s] will never dominate after a miss is seen.
   899  			// Installing the new value might match some future values.
   900  			common[sk] = v
   901  		}
   902  	}
   903  
   904  	// Indices of entries in f.Names that need to be deleted.
   905  	var toDelete []namedVal
   906  
   907  	// Rewrite selectors.
   908  	for i, v := range allOrdered {
   909  		if debug {
   910  			b := v.Block
   911  			fmt.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses)
   912  		}
   913  		if v.Uses == 0 {
   914  			v.reset(OpInvalid)
   915  			continue
   916  		}
   917  		if v.Op == OpCopy {
   918  			continue
   919  		}
   920  		locs := rewriteSelect(v, v, 0)
   921  		// Install new names.
   922  		if v.Type.IsMemory() {
   923  			continue
   924  		}
   925  		// Leaf types may have debug locations
   926  		if !isAlreadyExpandedAggregateType(v.Type) {
   927  			for _, l := range locs {
   928  				f.NamedValues[l] = append(f.NamedValues[l], v)
   929  			}
   930  			f.Names = append(f.Names, locs...)
   931  			continue
   932  		}
   933  		// Not-leaf types that had debug locations need to lose them.
   934  		if ns, ok := namedSelects[v]; ok {
   935  			toDelete = append(toDelete, ns...)
   936  		}
   937  	}
   938  
   939  	deleteNamedVals(f, toDelete)
   940  
   941  	// Step 4: rewrite the calls themselves, correcting the type
   942  	for _, b := range f.Blocks {
   943  		for _, v := range b.Values {
   944  			switch v.Op {
   945  			case OpStaticLECall:
   946  				v.Op = OpStaticCall
   947  				v.Type = types.TypeMem
   948  			case OpClosureLECall:
   949  				v.Op = OpClosureCall
   950  				v.Type = types.TypeMem
   951  			case OpInterLECall:
   952  				v.Op = OpInterCall
   953  				v.Type = types.TypeMem
   954  			}
   955  		}
   956  	}
   957  
   958  	// Step 5: elide any copies introduced.
   959  	for _, b := range f.Blocks {
   960  		for _, v := range b.Values {
   961  			for i, a := range v.Args {
   962  				if a.Op != OpCopy {
   963  					continue
   964  				}
   965  				aa := copySource(a)
   966  				v.SetArg(i, aa)
   967  				for a.Uses == 0 {
   968  					b := a.Args[0]
   969  					a.reset(OpInvalid)
   970  					a = b
   971  				}
   972  			}
   973  		}
   974  	}
   975  }
   976  

View as plain text