Black Lives Matter. Support the Equal Justice Initiative.

Source file src/cmd/compile/internal/arm64/ssa.go

Documentation: cmd/compile/internal/arm64

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package arm64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/gc"
    11  	"cmd/compile/internal/logopt"
    12  	"cmd/compile/internal/ssa"
    13  	"cmd/compile/internal/types"
    14  	"cmd/internal/obj"
    15  	"cmd/internal/obj/arm64"
    16  )
    17  
    18  // loadByType returns the load instruction of the given type.
    19  func loadByType(t *types.Type) obj.As {
    20  	if t.IsFloat() {
    21  		switch t.Size() {
    22  		case 4:
    23  			return arm64.AFMOVS
    24  		case 8:
    25  			return arm64.AFMOVD
    26  		}
    27  	} else {
    28  		switch t.Size() {
    29  		case 1:
    30  			if t.IsSigned() {
    31  				return arm64.AMOVB
    32  			} else {
    33  				return arm64.AMOVBU
    34  			}
    35  		case 2:
    36  			if t.IsSigned() {
    37  				return arm64.AMOVH
    38  			} else {
    39  				return arm64.AMOVHU
    40  			}
    41  		case 4:
    42  			if t.IsSigned() {
    43  				return arm64.AMOVW
    44  			} else {
    45  				return arm64.AMOVWU
    46  			}
    47  		case 8:
    48  			return arm64.AMOVD
    49  		}
    50  	}
    51  	panic("bad load type")
    52  }
    53  
    54  // storeByType returns the store instruction of the given type.
    55  func storeByType(t *types.Type) obj.As {
    56  	if t.IsFloat() {
    57  		switch t.Size() {
    58  		case 4:
    59  			return arm64.AFMOVS
    60  		case 8:
    61  			return arm64.AFMOVD
    62  		}
    63  	} else {
    64  		switch t.Size() {
    65  		case 1:
    66  			return arm64.AMOVB
    67  		case 2:
    68  			return arm64.AMOVH
    69  		case 4:
    70  			return arm64.AMOVW
    71  		case 8:
    72  			return arm64.AMOVD
    73  		}
    74  	}
    75  	panic("bad store type")
    76  }
    77  
    78  // makeshift encodes a register shifted by a constant, used as an Offset in Prog
    79  func makeshift(reg int16, typ int64, s int64) int64 {
    80  	return int64(reg&31)<<16 | typ | (s&63)<<10
    81  }
    82  
    83  // genshift generates a Prog for r = r0 op (r1 shifted by n)
    84  func genshift(s *gc.SSAGenState, as obj.As, r0, r1, r int16, typ int64, n int64) *obj.Prog {
    85  	p := s.Prog(as)
    86  	p.From.Type = obj.TYPE_SHIFT
    87  	p.From.Offset = makeshift(r1, typ, n)
    88  	p.Reg = r0
    89  	if r != 0 {
    90  		p.To.Type = obj.TYPE_REG
    91  		p.To.Reg = r
    92  	}
    93  	return p
    94  }
    95  
    96  // generate the memory operand for the indexed load/store instructions
    97  func genIndexedOperand(v *ssa.Value) obj.Addr {
    98  	// Reg: base register, Index: (shifted) index register
    99  	mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   100  	switch v.Op {
   101  	case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8:
   102  		mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
   103  	case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4:
   104  		mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
   105  	case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
   106  		mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
   107  	default: // not shifted
   108  		mop.Index = v.Args[1].Reg()
   109  	}
   110  	return mop
   111  }
   112  
   113  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   114  	switch v.Op {
   115  	case ssa.OpCopy, ssa.OpARM64MOVDreg:
   116  		if v.Type.IsMemory() {
   117  			return
   118  		}
   119  		x := v.Args[0].Reg()
   120  		y := v.Reg()
   121  		if x == y {
   122  			return
   123  		}
   124  		as := arm64.AMOVD
   125  		if v.Type.IsFloat() {
   126  			switch v.Type.Size() {
   127  			case 4:
   128  				as = arm64.AFMOVS
   129  			case 8:
   130  				as = arm64.AFMOVD
   131  			default:
   132  				panic("bad float size")
   133  			}
   134  		}
   135  		p := s.Prog(as)
   136  		p.From.Type = obj.TYPE_REG
   137  		p.From.Reg = x
   138  		p.To.Type = obj.TYPE_REG
   139  		p.To.Reg = y
   140  	case ssa.OpARM64MOVDnop:
   141  		if v.Reg() != v.Args[0].Reg() {
   142  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   143  		}
   144  		// nothing to do
   145  	case ssa.OpLoadReg:
   146  		if v.Type.IsFlags() {
   147  			v.Fatalf("load flags not implemented: %v", v.LongString())
   148  			return
   149  		}
   150  		p := s.Prog(loadByType(v.Type))
   151  		gc.AddrAuto(&p.From, v.Args[0])
   152  		p.To.Type = obj.TYPE_REG
   153  		p.To.Reg = v.Reg()
   154  	case ssa.OpStoreReg:
   155  		if v.Type.IsFlags() {
   156  			v.Fatalf("store flags not implemented: %v", v.LongString())
   157  			return
   158  		}
   159  		p := s.Prog(storeByType(v.Type))
   160  		p.From.Type = obj.TYPE_REG
   161  		p.From.Reg = v.Args[0].Reg()
   162  		gc.AddrAuto(&p.To, v)
   163  	case ssa.OpARM64ADD,
   164  		ssa.OpARM64SUB,
   165  		ssa.OpARM64AND,
   166  		ssa.OpARM64OR,
   167  		ssa.OpARM64XOR,
   168  		ssa.OpARM64BIC,
   169  		ssa.OpARM64EON,
   170  		ssa.OpARM64ORN,
   171  		ssa.OpARM64MUL,
   172  		ssa.OpARM64MULW,
   173  		ssa.OpARM64MNEG,
   174  		ssa.OpARM64MNEGW,
   175  		ssa.OpARM64MULH,
   176  		ssa.OpARM64UMULH,
   177  		ssa.OpARM64MULL,
   178  		ssa.OpARM64UMULL,
   179  		ssa.OpARM64DIV,
   180  		ssa.OpARM64UDIV,
   181  		ssa.OpARM64DIVW,
   182  		ssa.OpARM64UDIVW,
   183  		ssa.OpARM64MOD,
   184  		ssa.OpARM64UMOD,
   185  		ssa.OpARM64MODW,
   186  		ssa.OpARM64UMODW,
   187  		ssa.OpARM64SLL,
   188  		ssa.OpARM64SRL,
   189  		ssa.OpARM64SRA,
   190  		ssa.OpARM64FADDS,
   191  		ssa.OpARM64FADDD,
   192  		ssa.OpARM64FSUBS,
   193  		ssa.OpARM64FSUBD,
   194  		ssa.OpARM64FMULS,
   195  		ssa.OpARM64FMULD,
   196  		ssa.OpARM64FNMULS,
   197  		ssa.OpARM64FNMULD,
   198  		ssa.OpARM64FDIVS,
   199  		ssa.OpARM64FDIVD,
   200  		ssa.OpARM64ROR,
   201  		ssa.OpARM64RORW:
   202  		r := v.Reg()
   203  		r1 := v.Args[0].Reg()
   204  		r2 := v.Args[1].Reg()
   205  		p := s.Prog(v.Op.Asm())
   206  		p.From.Type = obj.TYPE_REG
   207  		p.From.Reg = r2
   208  		p.Reg = r1
   209  		p.To.Type = obj.TYPE_REG
   210  		p.To.Reg = r
   211  	case ssa.OpARM64FMADDS,
   212  		ssa.OpARM64FMADDD,
   213  		ssa.OpARM64FNMADDS,
   214  		ssa.OpARM64FNMADDD,
   215  		ssa.OpARM64FMSUBS,
   216  		ssa.OpARM64FMSUBD,
   217  		ssa.OpARM64FNMSUBS,
   218  		ssa.OpARM64FNMSUBD,
   219  		ssa.OpARM64MADD,
   220  		ssa.OpARM64MADDW,
   221  		ssa.OpARM64MSUB,
   222  		ssa.OpARM64MSUBW:
   223  		rt := v.Reg()
   224  		ra := v.Args[0].Reg()
   225  		rm := v.Args[1].Reg()
   226  		rn := v.Args[2].Reg()
   227  		p := s.Prog(v.Op.Asm())
   228  		p.Reg = ra
   229  		p.From.Type = obj.TYPE_REG
   230  		p.From.Reg = rm
   231  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: rn})
   232  		p.To.Type = obj.TYPE_REG
   233  		p.To.Reg = rt
   234  	case ssa.OpARM64ADDconst,
   235  		ssa.OpARM64SUBconst,
   236  		ssa.OpARM64ANDconst,
   237  		ssa.OpARM64ORconst,
   238  		ssa.OpARM64XORconst,
   239  		ssa.OpARM64SLLconst,
   240  		ssa.OpARM64SRLconst,
   241  		ssa.OpARM64SRAconst,
   242  		ssa.OpARM64RORconst,
   243  		ssa.OpARM64RORWconst:
   244  		p := s.Prog(v.Op.Asm())
   245  		p.From.Type = obj.TYPE_CONST
   246  		p.From.Offset = v.AuxInt
   247  		p.Reg = v.Args[0].Reg()
   248  		p.To.Type = obj.TYPE_REG
   249  		p.To.Reg = v.Reg()
   250  	case ssa.OpARM64ADDSconstflags:
   251  		p := s.Prog(v.Op.Asm())
   252  		p.From.Type = obj.TYPE_CONST
   253  		p.From.Offset = v.AuxInt
   254  		p.Reg = v.Args[0].Reg()
   255  		p.To.Type = obj.TYPE_REG
   256  		p.To.Reg = v.Reg0()
   257  	case ssa.OpARM64ADCzerocarry:
   258  		p := s.Prog(v.Op.Asm())
   259  		p.From.Type = obj.TYPE_REG
   260  		p.From.Reg = arm64.REGZERO
   261  		p.Reg = arm64.REGZERO
   262  		p.To.Type = obj.TYPE_REG
   263  		p.To.Reg = v.Reg()
   264  	case ssa.OpARM64ADCSflags,
   265  		ssa.OpARM64ADDSflags,
   266  		ssa.OpARM64SBCSflags,
   267  		ssa.OpARM64SUBSflags:
   268  		r := v.Reg0()
   269  		r1 := v.Args[0].Reg()
   270  		r2 := v.Args[1].Reg()
   271  		p := s.Prog(v.Op.Asm())
   272  		p.From.Type = obj.TYPE_REG
   273  		p.From.Reg = r2
   274  		p.Reg = r1
   275  		p.To.Type = obj.TYPE_REG
   276  		p.To.Reg = r
   277  	case ssa.OpARM64NEGSflags:
   278  		p := s.Prog(v.Op.Asm())
   279  		p.From.Type = obj.TYPE_REG
   280  		p.From.Reg = v.Args[0].Reg()
   281  		p.To.Type = obj.TYPE_REG
   282  		p.To.Reg = v.Reg0()
   283  	case ssa.OpARM64NGCzerocarry:
   284  		p := s.Prog(v.Op.Asm())
   285  		p.From.Type = obj.TYPE_REG
   286  		p.From.Reg = arm64.REGZERO
   287  		p.To.Type = obj.TYPE_REG
   288  		p.To.Reg = v.Reg()
   289  	case ssa.OpARM64EXTRconst,
   290  		ssa.OpARM64EXTRWconst:
   291  		p := s.Prog(v.Op.Asm())
   292  		p.From.Type = obj.TYPE_CONST
   293  		p.From.Offset = v.AuxInt
   294  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
   295  		p.Reg = v.Args[1].Reg()
   296  		p.To.Type = obj.TYPE_REG
   297  		p.To.Reg = v.Reg()
   298  	case ssa.OpARM64MVNshiftLL, ssa.OpARM64NEGshiftLL:
   299  		genshift(s, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   300  	case ssa.OpARM64MVNshiftRL, ssa.OpARM64NEGshiftRL:
   301  		genshift(s, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   302  	case ssa.OpARM64MVNshiftRA, ssa.OpARM64NEGshiftRA:
   303  		genshift(s, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   304  	case ssa.OpARM64ADDshiftLL,
   305  		ssa.OpARM64SUBshiftLL,
   306  		ssa.OpARM64ANDshiftLL,
   307  		ssa.OpARM64ORshiftLL,
   308  		ssa.OpARM64XORshiftLL,
   309  		ssa.OpARM64EONshiftLL,
   310  		ssa.OpARM64ORNshiftLL,
   311  		ssa.OpARM64BICshiftLL:
   312  		genshift(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   313  	case ssa.OpARM64ADDshiftRL,
   314  		ssa.OpARM64SUBshiftRL,
   315  		ssa.OpARM64ANDshiftRL,
   316  		ssa.OpARM64ORshiftRL,
   317  		ssa.OpARM64XORshiftRL,
   318  		ssa.OpARM64EONshiftRL,
   319  		ssa.OpARM64ORNshiftRL,
   320  		ssa.OpARM64BICshiftRL:
   321  		genshift(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   322  	case ssa.OpARM64ADDshiftRA,
   323  		ssa.OpARM64SUBshiftRA,
   324  		ssa.OpARM64ANDshiftRA,
   325  		ssa.OpARM64ORshiftRA,
   326  		ssa.OpARM64XORshiftRA,
   327  		ssa.OpARM64EONshiftRA,
   328  		ssa.OpARM64ORNshiftRA,
   329  		ssa.OpARM64BICshiftRA:
   330  		genshift(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   331  	case ssa.OpARM64MOVDconst:
   332  		p := s.Prog(v.Op.Asm())
   333  		p.From.Type = obj.TYPE_CONST
   334  		p.From.Offset = v.AuxInt
   335  		p.To.Type = obj.TYPE_REG
   336  		p.To.Reg = v.Reg()
   337  	case ssa.OpARM64FMOVSconst,
   338  		ssa.OpARM64FMOVDconst:
   339  		p := s.Prog(v.Op.Asm())
   340  		p.From.Type = obj.TYPE_FCONST
   341  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   342  		p.To.Type = obj.TYPE_REG
   343  		p.To.Reg = v.Reg()
   344  	case ssa.OpARM64FCMPS0,
   345  		ssa.OpARM64FCMPD0:
   346  		p := s.Prog(v.Op.Asm())
   347  		p.From.Type = obj.TYPE_FCONST
   348  		p.From.Val = math.Float64frombits(0)
   349  		p.Reg = v.Args[0].Reg()
   350  	case ssa.OpARM64CMP,
   351  		ssa.OpARM64CMPW,
   352  		ssa.OpARM64CMN,
   353  		ssa.OpARM64CMNW,
   354  		ssa.OpARM64TST,
   355  		ssa.OpARM64TSTW,
   356  		ssa.OpARM64FCMPS,
   357  		ssa.OpARM64FCMPD:
   358  		p := s.Prog(v.Op.Asm())
   359  		p.From.Type = obj.TYPE_REG
   360  		p.From.Reg = v.Args[1].Reg()
   361  		p.Reg = v.Args[0].Reg()
   362  	case ssa.OpARM64CMPconst,
   363  		ssa.OpARM64CMPWconst,
   364  		ssa.OpARM64CMNconst,
   365  		ssa.OpARM64CMNWconst,
   366  		ssa.OpARM64TSTconst,
   367  		ssa.OpARM64TSTWconst:
   368  		p := s.Prog(v.Op.Asm())
   369  		p.From.Type = obj.TYPE_CONST
   370  		p.From.Offset = v.AuxInt
   371  		p.Reg = v.Args[0].Reg()
   372  	case ssa.OpARM64CMPshiftLL, ssa.OpARM64CMNshiftLL, ssa.OpARM64TSTshiftLL:
   373  		genshift(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LL, v.AuxInt)
   374  	case ssa.OpARM64CMPshiftRL, ssa.OpARM64CMNshiftRL, ssa.OpARM64TSTshiftRL:
   375  		genshift(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LR, v.AuxInt)
   376  	case ssa.OpARM64CMPshiftRA, ssa.OpARM64CMNshiftRA, ssa.OpARM64TSTshiftRA:
   377  		genshift(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_AR, v.AuxInt)
   378  	case ssa.OpARM64MOVDaddr:
   379  		p := s.Prog(arm64.AMOVD)
   380  		p.From.Type = obj.TYPE_ADDR
   381  		p.From.Reg = v.Args[0].Reg()
   382  		p.To.Type = obj.TYPE_REG
   383  		p.To.Reg = v.Reg()
   384  
   385  		var wantreg string
   386  		// MOVD $sym+off(base), R
   387  		// the assembler expands it as the following:
   388  		// - base is SP: add constant offset to SP (R13)
   389  		//               when constant is large, tmp register (R11) may be used
   390  		// - base is SB: load external address from constant pool (use relocation)
   391  		switch v.Aux.(type) {
   392  		default:
   393  			v.Fatalf("aux is of unknown type %T", v.Aux)
   394  		case *obj.LSym:
   395  			wantreg = "SB"
   396  			gc.AddAux(&p.From, v)
   397  		case *gc.Node:
   398  			wantreg = "SP"
   399  			gc.AddAux(&p.From, v)
   400  		case nil:
   401  			// No sym, just MOVD $off(SP), R
   402  			wantreg = "SP"
   403  			p.From.Offset = v.AuxInt
   404  		}
   405  		if reg := v.Args[0].RegName(); reg != wantreg {
   406  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   407  		}
   408  	case ssa.OpARM64MOVBload,
   409  		ssa.OpARM64MOVBUload,
   410  		ssa.OpARM64MOVHload,
   411  		ssa.OpARM64MOVHUload,
   412  		ssa.OpARM64MOVWload,
   413  		ssa.OpARM64MOVWUload,
   414  		ssa.OpARM64MOVDload,
   415  		ssa.OpARM64FMOVSload,
   416  		ssa.OpARM64FMOVDload:
   417  		p := s.Prog(v.Op.Asm())
   418  		p.From.Type = obj.TYPE_MEM
   419  		p.From.Reg = v.Args[0].Reg()
   420  		gc.AddAux(&p.From, v)
   421  		p.To.Type = obj.TYPE_REG
   422  		p.To.Reg = v.Reg()
   423  	case ssa.OpARM64MOVBloadidx,
   424  		ssa.OpARM64MOVBUloadidx,
   425  		ssa.OpARM64MOVHloadidx,
   426  		ssa.OpARM64MOVHUloadidx,
   427  		ssa.OpARM64MOVWloadidx,
   428  		ssa.OpARM64MOVWUloadidx,
   429  		ssa.OpARM64MOVDloadidx,
   430  		ssa.OpARM64FMOVSloadidx,
   431  		ssa.OpARM64FMOVDloadidx,
   432  		ssa.OpARM64MOVHloadidx2,
   433  		ssa.OpARM64MOVHUloadidx2,
   434  		ssa.OpARM64MOVWloadidx4,
   435  		ssa.OpARM64MOVWUloadidx4,
   436  		ssa.OpARM64MOVDloadidx8:
   437  		p := s.Prog(v.Op.Asm())
   438  		p.From = genIndexedOperand(v)
   439  		p.To.Type = obj.TYPE_REG
   440  		p.To.Reg = v.Reg()
   441  	case ssa.OpARM64LDAR,
   442  		ssa.OpARM64LDARB,
   443  		ssa.OpARM64LDARW:
   444  		p := s.Prog(v.Op.Asm())
   445  		p.From.Type = obj.TYPE_MEM
   446  		p.From.Reg = v.Args[0].Reg()
   447  		gc.AddAux(&p.From, v)
   448  		p.To.Type = obj.TYPE_REG
   449  		p.To.Reg = v.Reg0()
   450  	case ssa.OpARM64MOVBstore,
   451  		ssa.OpARM64MOVHstore,
   452  		ssa.OpARM64MOVWstore,
   453  		ssa.OpARM64MOVDstore,
   454  		ssa.OpARM64FMOVSstore,
   455  		ssa.OpARM64FMOVDstore,
   456  		ssa.OpARM64STLRB,
   457  		ssa.OpARM64STLR,
   458  		ssa.OpARM64STLRW:
   459  		p := s.Prog(v.Op.Asm())
   460  		p.From.Type = obj.TYPE_REG
   461  		p.From.Reg = v.Args[1].Reg()
   462  		p.To.Type = obj.TYPE_MEM
   463  		p.To.Reg = v.Args[0].Reg()
   464  		gc.AddAux(&p.To, v)
   465  	case ssa.OpARM64MOVBstoreidx,
   466  		ssa.OpARM64MOVHstoreidx,
   467  		ssa.OpARM64MOVWstoreidx,
   468  		ssa.OpARM64MOVDstoreidx,
   469  		ssa.OpARM64FMOVSstoreidx,
   470  		ssa.OpARM64FMOVDstoreidx,
   471  		ssa.OpARM64MOVHstoreidx2,
   472  		ssa.OpARM64MOVWstoreidx4,
   473  		ssa.OpARM64MOVDstoreidx8:
   474  		p := s.Prog(v.Op.Asm())
   475  		p.To = genIndexedOperand(v)
   476  		p.From.Type = obj.TYPE_REG
   477  		p.From.Reg = v.Args[2].Reg()
   478  	case ssa.OpARM64STP:
   479  		p := s.Prog(v.Op.Asm())
   480  		p.From.Type = obj.TYPE_REGREG
   481  		p.From.Reg = v.Args[1].Reg()
   482  		p.From.Offset = int64(v.Args[2].Reg())
   483  		p.To.Type = obj.TYPE_MEM
   484  		p.To.Reg = v.Args[0].Reg()
   485  		gc.AddAux(&p.To, v)
   486  	case ssa.OpARM64MOVBstorezero,
   487  		ssa.OpARM64MOVHstorezero,
   488  		ssa.OpARM64MOVWstorezero,
   489  		ssa.OpARM64MOVDstorezero:
   490  		p := s.Prog(v.Op.Asm())
   491  		p.From.Type = obj.TYPE_REG
   492  		p.From.Reg = arm64.REGZERO
   493  		p.To.Type = obj.TYPE_MEM
   494  		p.To.Reg = v.Args[0].Reg()
   495  		gc.AddAux(&p.To, v)
   496  	case ssa.OpARM64MOVBstorezeroidx,
   497  		ssa.OpARM64MOVHstorezeroidx,
   498  		ssa.OpARM64MOVWstorezeroidx,
   499  		ssa.OpARM64MOVDstorezeroidx,
   500  		ssa.OpARM64MOVHstorezeroidx2,
   501  		ssa.OpARM64MOVWstorezeroidx4,
   502  		ssa.OpARM64MOVDstorezeroidx8:
   503  		p := s.Prog(v.Op.Asm())
   504  		p.To = genIndexedOperand(v)
   505  		p.From.Type = obj.TYPE_REG
   506  		p.From.Reg = arm64.REGZERO
   507  	case ssa.OpARM64MOVQstorezero:
   508  		p := s.Prog(v.Op.Asm())
   509  		p.From.Type = obj.TYPE_REGREG
   510  		p.From.Reg = arm64.REGZERO
   511  		p.From.Offset = int64(arm64.REGZERO)
   512  		p.To.Type = obj.TYPE_MEM
   513  		p.To.Reg = v.Args[0].Reg()
   514  		gc.AddAux(&p.To, v)
   515  	case ssa.OpARM64BFI,
   516  		ssa.OpARM64BFXIL:
   517  		r := v.Reg()
   518  		if r != v.Args[0].Reg() {
   519  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   520  		}
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_CONST
   523  		p.From.Offset = v.AuxInt >> 8
   524  		p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt & 0xff})
   525  		p.Reg = v.Args[1].Reg()
   526  		p.To.Type = obj.TYPE_REG
   527  		p.To.Reg = r
   528  	case ssa.OpARM64SBFIZ,
   529  		ssa.OpARM64SBFX,
   530  		ssa.OpARM64UBFIZ,
   531  		ssa.OpARM64UBFX:
   532  		p := s.Prog(v.Op.Asm())
   533  		p.From.Type = obj.TYPE_CONST
   534  		p.From.Offset = v.AuxInt >> 8
   535  		p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt & 0xff})
   536  		p.Reg = v.Args[0].Reg()
   537  		p.To.Type = obj.TYPE_REG
   538  		p.To.Reg = v.Reg()
   539  	case ssa.OpARM64LoweredMuluhilo:
   540  		r0 := v.Args[0].Reg()
   541  		r1 := v.Args[1].Reg()
   542  		p := s.Prog(arm64.AUMULH)
   543  		p.From.Type = obj.TYPE_REG
   544  		p.From.Reg = r1
   545  		p.Reg = r0
   546  		p.To.Type = obj.TYPE_REG
   547  		p.To.Reg = v.Reg0()
   548  		p1 := s.Prog(arm64.AMUL)
   549  		p1.From.Type = obj.TYPE_REG
   550  		p1.From.Reg = r1
   551  		p1.Reg = r0
   552  		p1.To.Type = obj.TYPE_REG
   553  		p1.To.Reg = v.Reg1()
   554  	case ssa.OpARM64LoweredAtomicExchange64,
   555  		ssa.OpARM64LoweredAtomicExchange32:
   556  		// LDAXR	(Rarg0), Rout
   557  		// STLXR	Rarg1, (Rarg0), Rtmp
   558  		// CBNZ		Rtmp, -2(PC)
   559  		ld := arm64.ALDAXR
   560  		st := arm64.ASTLXR
   561  		if v.Op == ssa.OpARM64LoweredAtomicExchange32 {
   562  			ld = arm64.ALDAXRW
   563  			st = arm64.ASTLXRW
   564  		}
   565  		r0 := v.Args[0].Reg()
   566  		r1 := v.Args[1].Reg()
   567  		out := v.Reg0()
   568  		p := s.Prog(ld)
   569  		p.From.Type = obj.TYPE_MEM
   570  		p.From.Reg = r0
   571  		p.To.Type = obj.TYPE_REG
   572  		p.To.Reg = out
   573  		p1 := s.Prog(st)
   574  		p1.From.Type = obj.TYPE_REG
   575  		p1.From.Reg = r1
   576  		p1.To.Type = obj.TYPE_MEM
   577  		p1.To.Reg = r0
   578  		p1.RegTo2 = arm64.REGTMP
   579  		p2 := s.Prog(arm64.ACBNZ)
   580  		p2.From.Type = obj.TYPE_REG
   581  		p2.From.Reg = arm64.REGTMP
   582  		p2.To.Type = obj.TYPE_BRANCH
   583  		gc.Patch(p2, p)
   584  	case ssa.OpARM64LoweredAtomicExchange64Variant,
   585  		ssa.OpARM64LoweredAtomicExchange32Variant:
   586  		swap := arm64.ASWPALD
   587  		if v.Op == ssa.OpARM64LoweredAtomicExchange32Variant {
   588  			swap = arm64.ASWPALW
   589  		}
   590  		r0 := v.Args[0].Reg()
   591  		r1 := v.Args[1].Reg()
   592  		out := v.Reg0()
   593  
   594  		// SWPALD	Rarg1, (Rarg0), Rout
   595  		p := s.Prog(swap)
   596  		p.From.Type = obj.TYPE_REG
   597  		p.From.Reg = r1
   598  		p.To.Type = obj.TYPE_MEM
   599  		p.To.Reg = r0
   600  		p.RegTo2 = out
   601  
   602  	case ssa.OpARM64LoweredAtomicAdd64,
   603  		ssa.OpARM64LoweredAtomicAdd32:
   604  		// LDAXR	(Rarg0), Rout
   605  		// ADD		Rarg1, Rout
   606  		// STLXR	Rout, (Rarg0), Rtmp
   607  		// CBNZ		Rtmp, -3(PC)
   608  		ld := arm64.ALDAXR
   609  		st := arm64.ASTLXR
   610  		if v.Op == ssa.OpARM64LoweredAtomicAdd32 {
   611  			ld = arm64.ALDAXRW
   612  			st = arm64.ASTLXRW
   613  		}
   614  		r0 := v.Args[0].Reg()
   615  		r1 := v.Args[1].Reg()
   616  		out := v.Reg0()
   617  		p := s.Prog(ld)
   618  		p.From.Type = obj.TYPE_MEM
   619  		p.From.Reg = r0
   620  		p.To.Type = obj.TYPE_REG
   621  		p.To.Reg = out
   622  		p1 := s.Prog(arm64.AADD)
   623  		p1.From.Type = obj.TYPE_REG
   624  		p1.From.Reg = r1
   625  		p1.To.Type = obj.TYPE_REG
   626  		p1.To.Reg = out
   627  		p2 := s.Prog(st)
   628  		p2.From.Type = obj.TYPE_REG
   629  		p2.From.Reg = out
   630  		p2.To.Type = obj.TYPE_MEM
   631  		p2.To.Reg = r0
   632  		p2.RegTo2 = arm64.REGTMP
   633  		p3 := s.Prog(arm64.ACBNZ)
   634  		p3.From.Type = obj.TYPE_REG
   635  		p3.From.Reg = arm64.REGTMP
   636  		p3.To.Type = obj.TYPE_BRANCH
   637  		gc.Patch(p3, p)
   638  	case ssa.OpARM64LoweredAtomicAdd64Variant,
   639  		ssa.OpARM64LoweredAtomicAdd32Variant:
   640  		// LDADDAL	Rarg1, (Rarg0), Rout
   641  		// ADD		Rarg1, Rout
   642  		op := arm64.ALDADDALD
   643  		if v.Op == ssa.OpARM64LoweredAtomicAdd32Variant {
   644  			op = arm64.ALDADDALW
   645  		}
   646  		r0 := v.Args[0].Reg()
   647  		r1 := v.Args[1].Reg()
   648  		out := v.Reg0()
   649  		p := s.Prog(op)
   650  		p.From.Type = obj.TYPE_REG
   651  		p.From.Reg = r1
   652  		p.To.Type = obj.TYPE_MEM
   653  		p.To.Reg = r0
   654  		p.RegTo2 = out
   655  		p1 := s.Prog(arm64.AADD)
   656  		p1.From.Type = obj.TYPE_REG
   657  		p1.From.Reg = r1
   658  		p1.To.Type = obj.TYPE_REG
   659  		p1.To.Reg = out
   660  	case ssa.OpARM64LoweredAtomicCas64,
   661  		ssa.OpARM64LoweredAtomicCas32:
   662  		// LDAXR	(Rarg0), Rtmp
   663  		// CMP		Rarg1, Rtmp
   664  		// BNE		3(PC)
   665  		// STLXR	Rarg2, (Rarg0), Rtmp
   666  		// CBNZ		Rtmp, -4(PC)
   667  		// CSET		EQ, Rout
   668  		ld := arm64.ALDAXR
   669  		st := arm64.ASTLXR
   670  		cmp := arm64.ACMP
   671  		if v.Op == ssa.OpARM64LoweredAtomicCas32 {
   672  			ld = arm64.ALDAXRW
   673  			st = arm64.ASTLXRW
   674  			cmp = arm64.ACMPW
   675  		}
   676  		r0 := v.Args[0].Reg()
   677  		r1 := v.Args[1].Reg()
   678  		r2 := v.Args[2].Reg()
   679  		out := v.Reg0()
   680  		p := s.Prog(ld)
   681  		p.From.Type = obj.TYPE_MEM
   682  		p.From.Reg = r0
   683  		p.To.Type = obj.TYPE_REG
   684  		p.To.Reg = arm64.REGTMP
   685  		p1 := s.Prog(cmp)
   686  		p1.From.Type = obj.TYPE_REG
   687  		p1.From.Reg = r1
   688  		p1.Reg = arm64.REGTMP
   689  		p2 := s.Prog(arm64.ABNE)
   690  		p2.To.Type = obj.TYPE_BRANCH
   691  		p3 := s.Prog(st)
   692  		p3.From.Type = obj.TYPE_REG
   693  		p3.From.Reg = r2
   694  		p3.To.Type = obj.TYPE_MEM
   695  		p3.To.Reg = r0
   696  		p3.RegTo2 = arm64.REGTMP
   697  		p4 := s.Prog(arm64.ACBNZ)
   698  		p4.From.Type = obj.TYPE_REG
   699  		p4.From.Reg = arm64.REGTMP
   700  		p4.To.Type = obj.TYPE_BRANCH
   701  		gc.Patch(p4, p)
   702  		p5 := s.Prog(arm64.ACSET)
   703  		p5.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
   704  		p5.From.Reg = arm64.COND_EQ
   705  		p5.To.Type = obj.TYPE_REG
   706  		p5.To.Reg = out
   707  		gc.Patch(p2, p5)
   708  	case ssa.OpARM64LoweredAtomicCas64Variant,
   709  		ssa.OpARM64LoweredAtomicCas32Variant:
   710  		// Rarg0: ptr
   711  		// Rarg1: old
   712  		// Rarg2: new
   713  		// MOV  	Rarg1, Rtmp
   714  		// CASAL	Rtmp, (Rarg0), Rarg2
   715  		// CMP  	Rarg1, Rtmp
   716  		// CSET 	EQ, Rout
   717  		cas := arm64.ACASALD
   718  		cmp := arm64.ACMP
   719  		mov := arm64.AMOVD
   720  		if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
   721  			cas = arm64.ACASALW
   722  			cmp = arm64.ACMPW
   723  			mov = arm64.AMOVW
   724  		}
   725  		r0 := v.Args[0].Reg()
   726  		r1 := v.Args[1].Reg()
   727  		r2 := v.Args[2].Reg()
   728  		out := v.Reg0()
   729  
   730  		// MOV  	Rarg1, Rtmp
   731  		p := s.Prog(mov)
   732  		p.From.Type = obj.TYPE_REG
   733  		p.From.Reg = r1
   734  		p.To.Type = obj.TYPE_REG
   735  		p.To.Reg = arm64.REGTMP
   736  
   737  		// CASAL	Rtmp, (Rarg0), Rarg2
   738  		p1 := s.Prog(cas)
   739  		p1.From.Type = obj.TYPE_REG
   740  		p1.From.Reg = arm64.REGTMP
   741  		p1.To.Type = obj.TYPE_MEM
   742  		p1.To.Reg = r0
   743  		p1.RegTo2 = r2
   744  
   745  		// CMP  	Rarg1, Rtmp
   746  		p2 := s.Prog(cmp)
   747  		p2.From.Type = obj.TYPE_REG
   748  		p2.From.Reg = r1
   749  		p2.Reg = arm64.REGTMP
   750  
   751  		// CSET 	EQ, Rout
   752  		p3 := s.Prog(arm64.ACSET)
   753  		p3.From.Type = obj.TYPE_REG
   754  		p3.From.Reg = arm64.COND_EQ
   755  		p3.To.Type = obj.TYPE_REG
   756  		p3.To.Reg = out
   757  
   758  	case ssa.OpARM64LoweredAtomicAnd8,
   759  		ssa.OpARM64LoweredAtomicAnd32,
   760  		ssa.OpARM64LoweredAtomicOr8,
   761  		ssa.OpARM64LoweredAtomicOr32:
   762  		// LDAXRB/LDAXRW (Rarg0), Rout
   763  		// AND/OR	Rarg1, Rout
   764  		// STLXRB/STLXRB Rout, (Rarg0), Rtmp
   765  		// CBNZ		Rtmp, -3(PC)
   766  		ld := arm64.ALDAXRB
   767  		st := arm64.ASTLXRB
   768  		if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
   769  			ld = arm64.ALDAXRW
   770  			st = arm64.ASTLXRW
   771  		}
   772  		r0 := v.Args[0].Reg()
   773  		r1 := v.Args[1].Reg()
   774  		out := v.Reg0()
   775  		p := s.Prog(ld)
   776  		p.From.Type = obj.TYPE_MEM
   777  		p.From.Reg = r0
   778  		p.To.Type = obj.TYPE_REG
   779  		p.To.Reg = out
   780  		p1 := s.Prog(v.Op.Asm())
   781  		p1.From.Type = obj.TYPE_REG
   782  		p1.From.Reg = r1
   783  		p1.To.Type = obj.TYPE_REG
   784  		p1.To.Reg = out
   785  		p2 := s.Prog(st)
   786  		p2.From.Type = obj.TYPE_REG
   787  		p2.From.Reg = out
   788  		p2.To.Type = obj.TYPE_MEM
   789  		p2.To.Reg = r0
   790  		p2.RegTo2 = arm64.REGTMP
   791  		p3 := s.Prog(arm64.ACBNZ)
   792  		p3.From.Type = obj.TYPE_REG
   793  		p3.From.Reg = arm64.REGTMP
   794  		p3.To.Type = obj.TYPE_BRANCH
   795  		gc.Patch(p3, p)
   796  	case ssa.OpARM64LoweredAtomicAnd8Variant,
   797  		ssa.OpARM64LoweredAtomicAnd32Variant:
   798  		atomic_clear := arm64.ALDCLRALW
   799  		if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
   800  			atomic_clear = arm64.ALDCLRALB
   801  		}
   802  		r0 := v.Args[0].Reg()
   803  		r1 := v.Args[1].Reg()
   804  		out := v.Reg0()
   805  
   806  		// MNV       Rarg1 Rtemp
   807  		p := s.Prog(arm64.AMVN)
   808  		p.From.Type = obj.TYPE_REG
   809  		p.From.Reg = r1
   810  		p.To.Type = obj.TYPE_REG
   811  		p.To.Reg = arm64.REGTMP
   812  
   813  		// LDCLRALW  Rtemp, (Rarg0), Rout
   814  		p1 := s.Prog(atomic_clear)
   815  		p1.From.Type = obj.TYPE_REG
   816  		p1.From.Reg = arm64.REGTMP
   817  		p1.To.Type = obj.TYPE_MEM
   818  		p1.To.Reg = r0
   819  		p1.RegTo2 = out
   820  
   821  		// AND       Rarg1, Rout
   822  		p2 := s.Prog(arm64.AAND)
   823  		p2.From.Type = obj.TYPE_REG
   824  		p2.From.Reg = r1
   825  		p2.To.Type = obj.TYPE_REG
   826  		p2.To.Reg = out
   827  
   828  	case ssa.OpARM64LoweredAtomicOr8Variant,
   829  		ssa.OpARM64LoweredAtomicOr32Variant:
   830  		atomic_or := arm64.ALDORALW
   831  		if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
   832  			atomic_or = arm64.ALDORALB
   833  		}
   834  		r0 := v.Args[0].Reg()
   835  		r1 := v.Args[1].Reg()
   836  		out := v.Reg0()
   837  
   838  		// LDORALW  Rarg1, (Rarg0), Rout
   839  		p := s.Prog(atomic_or)
   840  		p.From.Type = obj.TYPE_REG
   841  		p.From.Reg = r1
   842  		p.To.Type = obj.TYPE_MEM
   843  		p.To.Reg = r0
   844  		p.RegTo2 = out
   845  
   846  		// ORR       Rarg1, Rout
   847  		p2 := s.Prog(arm64.AORR)
   848  		p2.From.Type = obj.TYPE_REG
   849  		p2.From.Reg = r1
   850  		p2.To.Type = obj.TYPE_REG
   851  		p2.To.Reg = out
   852  
   853  	case ssa.OpARM64MOVBreg,
   854  		ssa.OpARM64MOVBUreg,
   855  		ssa.OpARM64MOVHreg,
   856  		ssa.OpARM64MOVHUreg,
   857  		ssa.OpARM64MOVWreg,
   858  		ssa.OpARM64MOVWUreg:
   859  		a := v.Args[0]
   860  		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
   861  			a = a.Args[0]
   862  		}
   863  		if a.Op == ssa.OpLoadReg {
   864  			t := a.Type
   865  			switch {
   866  			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
   867  				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   868  				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
   869  				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   870  				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
   871  				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   872  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   873  				if v.Reg() == v.Args[0].Reg() {
   874  					return
   875  				}
   876  				p := s.Prog(arm64.AMOVD)
   877  				p.From.Type = obj.TYPE_REG
   878  				p.From.Reg = v.Args[0].Reg()
   879  				p.To.Type = obj.TYPE_REG
   880  				p.To.Reg = v.Reg()
   881  				return
   882  			default:
   883  			}
   884  		}
   885  		fallthrough
   886  	case ssa.OpARM64MVN,
   887  		ssa.OpARM64NEG,
   888  		ssa.OpARM64FABSD,
   889  		ssa.OpARM64FMOVDfpgp,
   890  		ssa.OpARM64FMOVDgpfp,
   891  		ssa.OpARM64FMOVSfpgp,
   892  		ssa.OpARM64FMOVSgpfp,
   893  		ssa.OpARM64FNEGS,
   894  		ssa.OpARM64FNEGD,
   895  		ssa.OpARM64FSQRTD,
   896  		ssa.OpARM64FCVTZSSW,
   897  		ssa.OpARM64FCVTZSDW,
   898  		ssa.OpARM64FCVTZUSW,
   899  		ssa.OpARM64FCVTZUDW,
   900  		ssa.OpARM64FCVTZSS,
   901  		ssa.OpARM64FCVTZSD,
   902  		ssa.OpARM64FCVTZUS,
   903  		ssa.OpARM64FCVTZUD,
   904  		ssa.OpARM64SCVTFWS,
   905  		ssa.OpARM64SCVTFWD,
   906  		ssa.OpARM64SCVTFS,
   907  		ssa.OpARM64SCVTFD,
   908  		ssa.OpARM64UCVTFWS,
   909  		ssa.OpARM64UCVTFWD,
   910  		ssa.OpARM64UCVTFS,
   911  		ssa.OpARM64UCVTFD,
   912  		ssa.OpARM64FCVTSD,
   913  		ssa.OpARM64FCVTDS,
   914  		ssa.OpARM64REV,
   915  		ssa.OpARM64REVW,
   916  		ssa.OpARM64REV16W,
   917  		ssa.OpARM64RBIT,
   918  		ssa.OpARM64RBITW,
   919  		ssa.OpARM64CLZ,
   920  		ssa.OpARM64CLZW,
   921  		ssa.OpARM64FRINTAD,
   922  		ssa.OpARM64FRINTMD,
   923  		ssa.OpARM64FRINTND,
   924  		ssa.OpARM64FRINTPD,
   925  		ssa.OpARM64FRINTZD:
   926  		p := s.Prog(v.Op.Asm())
   927  		p.From.Type = obj.TYPE_REG
   928  		p.From.Reg = v.Args[0].Reg()
   929  		p.To.Type = obj.TYPE_REG
   930  		p.To.Reg = v.Reg()
   931  	case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
   932  		// input is already rounded
   933  	case ssa.OpARM64VCNT:
   934  		p := s.Prog(v.Op.Asm())
   935  		p.From.Type = obj.TYPE_REG
   936  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
   937  		p.To.Type = obj.TYPE_REG
   938  		p.To.Reg = (v.Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
   939  	case ssa.OpARM64VUADDLV:
   940  		p := s.Prog(v.Op.Asm())
   941  		p.From.Type = obj.TYPE_REG
   942  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
   943  		p.To.Type = obj.TYPE_REG
   944  		p.To.Reg = v.Reg() - arm64.REG_F0 + arm64.REG_V0
   945  	case ssa.OpARM64CSEL, ssa.OpARM64CSEL0:
   946  		r1 := int16(arm64.REGZERO)
   947  		if v.Op != ssa.OpARM64CSEL0 {
   948  			r1 = v.Args[1].Reg()
   949  		}
   950  		p := s.Prog(v.Op.Asm())
   951  		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
   952  		p.From.Reg = condBits[ssa.Op(v.AuxInt)]
   953  		p.Reg = v.Args[0].Reg()
   954  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r1})
   955  		p.To.Type = obj.TYPE_REG
   956  		p.To.Reg = v.Reg()
   957  	case ssa.OpARM64DUFFZERO:
   958  		// runtime.duffzero expects start address in R20
   959  		p := s.Prog(obj.ADUFFZERO)
   960  		p.To.Type = obj.TYPE_MEM
   961  		p.To.Name = obj.NAME_EXTERN
   962  		p.To.Sym = gc.Duffzero
   963  		p.To.Offset = v.AuxInt
   964  	case ssa.OpARM64LoweredZero:
   965  		// STP.P	(ZR,ZR), 16(R16)
   966  		// CMP	Rarg1, R16
   967  		// BLE	-2(PC)
   968  		// arg1 is the address of the last 16-byte unit to zero
   969  		p := s.Prog(arm64.ASTP)
   970  		p.Scond = arm64.C_XPOST
   971  		p.From.Type = obj.TYPE_REGREG
   972  		p.From.Reg = arm64.REGZERO
   973  		p.From.Offset = int64(arm64.REGZERO)
   974  		p.To.Type = obj.TYPE_MEM
   975  		p.To.Reg = arm64.REG_R16
   976  		p.To.Offset = 16
   977  		p2 := s.Prog(arm64.ACMP)
   978  		p2.From.Type = obj.TYPE_REG
   979  		p2.From.Reg = v.Args[1].Reg()
   980  		p2.Reg = arm64.REG_R16
   981  		p3 := s.Prog(arm64.ABLE)
   982  		p3.To.Type = obj.TYPE_BRANCH
   983  		gc.Patch(p3, p)
   984  	case ssa.OpARM64DUFFCOPY:
   985  		p := s.Prog(obj.ADUFFCOPY)
   986  		p.To.Type = obj.TYPE_MEM
   987  		p.To.Name = obj.NAME_EXTERN
   988  		p.To.Sym = gc.Duffcopy
   989  		p.To.Offset = v.AuxInt
   990  	case ssa.OpARM64LoweredMove:
   991  		// MOVD.P	8(R16), Rtmp
   992  		// MOVD.P	Rtmp, 8(R17)
   993  		// CMP	Rarg2, R16
   994  		// BLE	-3(PC)
   995  		// arg2 is the address of the last element of src
   996  		p := s.Prog(arm64.AMOVD)
   997  		p.Scond = arm64.C_XPOST
   998  		p.From.Type = obj.TYPE_MEM
   999  		p.From.Reg = arm64.REG_R16
  1000  		p.From.Offset = 8
  1001  		p.To.Type = obj.TYPE_REG
  1002  		p.To.Reg = arm64.REGTMP
  1003  		p2 := s.Prog(arm64.AMOVD)
  1004  		p2.Scond = arm64.C_XPOST
  1005  		p2.From.Type = obj.TYPE_REG
  1006  		p2.From.Reg = arm64.REGTMP
  1007  		p2.To.Type = obj.TYPE_MEM
  1008  		p2.To.Reg = arm64.REG_R17
  1009  		p2.To.Offset = 8
  1010  		p3 := s.Prog(arm64.ACMP)
  1011  		p3.From.Type = obj.TYPE_REG
  1012  		p3.From.Reg = v.Args[2].Reg()
  1013  		p3.Reg = arm64.REG_R16
  1014  		p4 := s.Prog(arm64.ABLE)
  1015  		p4.To.Type = obj.TYPE_BRANCH
  1016  		gc.Patch(p4, p)
  1017  	case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
  1018  		s.Call(v)
  1019  	case ssa.OpARM64LoweredWB:
  1020  		p := s.Prog(obj.ACALL)
  1021  		p.To.Type = obj.TYPE_MEM
  1022  		p.To.Name = obj.NAME_EXTERN
  1023  		p.To.Sym = v.Aux.(*obj.LSym)
  1024  	case ssa.OpARM64LoweredPanicBoundsA, ssa.OpARM64LoweredPanicBoundsB, ssa.OpARM64LoweredPanicBoundsC:
  1025  		p := s.Prog(obj.ACALL)
  1026  		p.To.Type = obj.TYPE_MEM
  1027  		p.To.Name = obj.NAME_EXTERN
  1028  		p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
  1029  		s.UseArgs(16) // space used in callee args area by assembly stubs
  1030  	case ssa.OpARM64LoweredNilCheck:
  1031  		// Issue a load which will fault if arg is nil.
  1032  		p := s.Prog(arm64.AMOVB)
  1033  		p.From.Type = obj.TYPE_MEM
  1034  		p.From.Reg = v.Args[0].Reg()
  1035  		gc.AddAux(&p.From, v)
  1036  		p.To.Type = obj.TYPE_REG
  1037  		p.To.Reg = arm64.REGTMP
  1038  		if logopt.Enabled() {
  1039  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1040  		}
  1041  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Line==1 in generated wrappers
  1042  			gc.Warnl(v.Pos, "generated nil check")
  1043  		}
  1044  	case ssa.OpARM64Equal,
  1045  		ssa.OpARM64NotEqual,
  1046  		ssa.OpARM64LessThan,
  1047  		ssa.OpARM64LessEqual,
  1048  		ssa.OpARM64GreaterThan,
  1049  		ssa.OpARM64GreaterEqual,
  1050  		ssa.OpARM64LessThanU,
  1051  		ssa.OpARM64LessEqualU,
  1052  		ssa.OpARM64GreaterThanU,
  1053  		ssa.OpARM64GreaterEqualU,
  1054  		ssa.OpARM64LessThanF,
  1055  		ssa.OpARM64LessEqualF,
  1056  		ssa.OpARM64GreaterThanF,
  1057  		ssa.OpARM64GreaterEqualF,
  1058  		ssa.OpARM64NotLessThanF,
  1059  		ssa.OpARM64NotLessEqualF,
  1060  		ssa.OpARM64NotGreaterThanF,
  1061  		ssa.OpARM64NotGreaterEqualF:
  1062  		// generate boolean values using CSET
  1063  		p := s.Prog(arm64.ACSET)
  1064  		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
  1065  		p.From.Reg = condBits[v.Op]
  1066  		p.To.Type = obj.TYPE_REG
  1067  		p.To.Reg = v.Reg()
  1068  	case ssa.OpARM64LoweredGetClosurePtr:
  1069  		// Closure pointer is R26 (arm64.REGCTXT).
  1070  		gc.CheckLoweredGetClosurePtr(v)
  1071  	case ssa.OpARM64LoweredGetCallerSP:
  1072  		// caller's SP is FixedFrameSize below the address of the first arg
  1073  		p := s.Prog(arm64.AMOVD)
  1074  		p.From.Type = obj.TYPE_ADDR
  1075  		p.From.Offset = -gc.Ctxt.FixedFrameSize()
  1076  		p.From.Name = obj.NAME_PARAM
  1077  		p.To.Type = obj.TYPE_REG
  1078  		p.To.Reg = v.Reg()
  1079  	case ssa.OpARM64LoweredGetCallerPC:
  1080  		p := s.Prog(obj.AGETCALLERPC)
  1081  		p.To.Type = obj.TYPE_REG
  1082  		p.To.Reg = v.Reg()
  1083  	case ssa.OpARM64FlagConstant:
  1084  		v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString())
  1085  	case ssa.OpARM64InvertFlags:
  1086  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1087  	case ssa.OpClobber:
  1088  		// TODO: implement for clobberdead experiment. Nop is ok for now.
  1089  	default:
  1090  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1091  	}
  1092  }
  1093  
  1094  var condBits = map[ssa.Op]int16{
  1095  	ssa.OpARM64Equal:         arm64.COND_EQ,
  1096  	ssa.OpARM64NotEqual:      arm64.COND_NE,
  1097  	ssa.OpARM64LessThan:      arm64.COND_LT,
  1098  	ssa.OpARM64LessThanU:     arm64.COND_LO,
  1099  	ssa.OpARM64LessEqual:     arm64.COND_LE,
  1100  	ssa.OpARM64LessEqualU:    arm64.COND_LS,
  1101  	ssa.OpARM64GreaterThan:   arm64.COND_GT,
  1102  	ssa.OpARM64GreaterThanU:  arm64.COND_HI,
  1103  	ssa.OpARM64GreaterEqual:  arm64.COND_GE,
  1104  	ssa.OpARM64GreaterEqualU: arm64.COND_HS,
  1105  	ssa.OpARM64LessThanF:     arm64.COND_MI, // Less than
  1106  	ssa.OpARM64LessEqualF:    arm64.COND_LS, // Less than or equal to
  1107  	ssa.OpARM64GreaterThanF:  arm64.COND_GT, // Greater than
  1108  	ssa.OpARM64GreaterEqualF: arm64.COND_GE, // Greater than or equal to
  1109  
  1110  	// The following condition codes have unordered to handle comparisons related to NaN.
  1111  	ssa.OpARM64NotLessThanF:     arm64.COND_PL, // Greater than, equal to, or unordered
  1112  	ssa.OpARM64NotLessEqualF:    arm64.COND_HI, // Greater than or unordered
  1113  	ssa.OpARM64NotGreaterThanF:  arm64.COND_LE, // Less than, equal to or unordered
  1114  	ssa.OpARM64NotGreaterEqualF: arm64.COND_LT, // Less than or unordered
  1115  }
  1116  
  1117  var blockJump = map[ssa.BlockKind]struct {
  1118  	asm, invasm obj.As
  1119  }{
  1120  	ssa.BlockARM64EQ:     {arm64.ABEQ, arm64.ABNE},
  1121  	ssa.BlockARM64NE:     {arm64.ABNE, arm64.ABEQ},
  1122  	ssa.BlockARM64LT:     {arm64.ABLT, arm64.ABGE},
  1123  	ssa.BlockARM64GE:     {arm64.ABGE, arm64.ABLT},
  1124  	ssa.BlockARM64LE:     {arm64.ABLE, arm64.ABGT},
  1125  	ssa.BlockARM64GT:     {arm64.ABGT, arm64.ABLE},
  1126  	ssa.BlockARM64ULT:    {arm64.ABLO, arm64.ABHS},
  1127  	ssa.BlockARM64UGE:    {arm64.ABHS, arm64.ABLO},
  1128  	ssa.BlockARM64UGT:    {arm64.ABHI, arm64.ABLS},
  1129  	ssa.BlockARM64ULE:    {arm64.ABLS, arm64.ABHI},
  1130  	ssa.BlockARM64Z:      {arm64.ACBZ, arm64.ACBNZ},
  1131  	ssa.BlockARM64NZ:     {arm64.ACBNZ, arm64.ACBZ},
  1132  	ssa.BlockARM64ZW:     {arm64.ACBZW, arm64.ACBNZW},
  1133  	ssa.BlockARM64NZW:    {arm64.ACBNZW, arm64.ACBZW},
  1134  	ssa.BlockARM64TBZ:    {arm64.ATBZ, arm64.ATBNZ},
  1135  	ssa.BlockARM64TBNZ:   {arm64.ATBNZ, arm64.ATBZ},
  1136  	ssa.BlockARM64FLT:    {arm64.ABMI, arm64.ABPL},
  1137  	ssa.BlockARM64FGE:    {arm64.ABGE, arm64.ABLT},
  1138  	ssa.BlockARM64FLE:    {arm64.ABLS, arm64.ABHI},
  1139  	ssa.BlockARM64FGT:    {arm64.ABGT, arm64.ABLE},
  1140  	ssa.BlockARM64LTnoov: {arm64.ABMI, arm64.ABPL},
  1141  	ssa.BlockARM64GEnoov: {arm64.ABPL, arm64.ABMI},
  1142  }
  1143  
  1144  // To model a 'LEnoov' ('<=' without overflow checking) branching
  1145  var leJumps = [2][2]gc.IndexJump{
  1146  	{{Jump: arm64.ABEQ, Index: 0}, {Jump: arm64.ABPL, Index: 1}}, // next == b.Succs[0]
  1147  	{{Jump: arm64.ABMI, Index: 0}, {Jump: arm64.ABEQ, Index: 0}}, // next == b.Succs[1]
  1148  }
  1149  
  1150  // To model a 'GTnoov' ('>' without overflow checking) branching
  1151  var gtJumps = [2][2]gc.IndexJump{
  1152  	{{Jump: arm64.ABMI, Index: 1}, {Jump: arm64.ABEQ, Index: 1}}, // next == b.Succs[0]
  1153  	{{Jump: arm64.ABEQ, Index: 1}, {Jump: arm64.ABPL, Index: 0}}, // next == b.Succs[1]
  1154  }
  1155  
  1156  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
  1157  	switch b.Kind {
  1158  	case ssa.BlockPlain:
  1159  		if b.Succs[0].Block() != next {
  1160  			p := s.Prog(obj.AJMP)
  1161  			p.To.Type = obj.TYPE_BRANCH
  1162  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1163  		}
  1164  
  1165  	case ssa.BlockDefer:
  1166  		// defer returns in R0:
  1167  		// 0 if we should continue executing
  1168  		// 1 if we should jump to deferreturn call
  1169  		p := s.Prog(arm64.ACMP)
  1170  		p.From.Type = obj.TYPE_CONST
  1171  		p.From.Offset = 0
  1172  		p.Reg = arm64.REG_R0
  1173  		p = s.Prog(arm64.ABNE)
  1174  		p.To.Type = obj.TYPE_BRANCH
  1175  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1176  		if b.Succs[0].Block() != next {
  1177  			p := s.Prog(obj.AJMP)
  1178  			p.To.Type = obj.TYPE_BRANCH
  1179  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1180  		}
  1181  
  1182  	case ssa.BlockExit:
  1183  
  1184  	case ssa.BlockRet:
  1185  		s.Prog(obj.ARET)
  1186  
  1187  	case ssa.BlockRetJmp:
  1188  		p := s.Prog(obj.ARET)
  1189  		p.To.Type = obj.TYPE_MEM
  1190  		p.To.Name = obj.NAME_EXTERN
  1191  		p.To.Sym = b.Aux.(*obj.LSym)
  1192  
  1193  	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
  1194  		ssa.BlockARM64LT, ssa.BlockARM64GE,
  1195  		ssa.BlockARM64LE, ssa.BlockARM64GT,
  1196  		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
  1197  		ssa.BlockARM64ULE, ssa.BlockARM64UGE,
  1198  		ssa.BlockARM64Z, ssa.BlockARM64NZ,
  1199  		ssa.BlockARM64ZW, ssa.BlockARM64NZW,
  1200  		ssa.BlockARM64FLT, ssa.BlockARM64FGE,
  1201  		ssa.BlockARM64FLE, ssa.BlockARM64FGT,
  1202  		ssa.BlockARM64LTnoov, ssa.BlockARM64GEnoov:
  1203  		jmp := blockJump[b.Kind]
  1204  		var p *obj.Prog
  1205  		switch next {
  1206  		case b.Succs[0].Block():
  1207  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1208  		case b.Succs[1].Block():
  1209  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1210  		default:
  1211  			if b.Likely != ssa.BranchUnlikely {
  1212  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1213  				s.Br(obj.AJMP, b.Succs[1].Block())
  1214  			} else {
  1215  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1216  				s.Br(obj.AJMP, b.Succs[0].Block())
  1217  			}
  1218  		}
  1219  		if !b.Controls[0].Type.IsFlags() {
  1220  			p.From.Type = obj.TYPE_REG
  1221  			p.From.Reg = b.Controls[0].Reg()
  1222  		}
  1223  	case ssa.BlockARM64TBZ, ssa.BlockARM64TBNZ:
  1224  		jmp := blockJump[b.Kind]
  1225  		var p *obj.Prog
  1226  		switch next {
  1227  		case b.Succs[0].Block():
  1228  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1229  		case b.Succs[1].Block():
  1230  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1231  		default:
  1232  			if b.Likely != ssa.BranchUnlikely {
  1233  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1234  				s.Br(obj.AJMP, b.Succs[1].Block())
  1235  			} else {
  1236  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1237  				s.Br(obj.AJMP, b.Succs[0].Block())
  1238  			}
  1239  		}
  1240  		p.From.Offset = b.AuxInt
  1241  		p.From.Type = obj.TYPE_CONST
  1242  		p.Reg = b.Controls[0].Reg()
  1243  
  1244  	case ssa.BlockARM64LEnoov:
  1245  		s.CombJump(b, next, &leJumps)
  1246  	case ssa.BlockARM64GTnoov:
  1247  		s.CombJump(b, next, &gtJumps)
  1248  	default:
  1249  		b.Fatalf("branch not implemented: %s", b.LongString())
  1250  	}
  1251  }
  1252  

View as plain text