Black Lives Matter. Support the Equal Justice Initiative.

Source file src/cmd/compile/internal/amd64/ssa.go

Documentation: cmd/compile/internal/amd64

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/gc"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/ssa"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/x86"
    17  )
    18  
    19  // markMoves marks any MOVXconst ops that need to avoid clobbering flags.
    20  func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
    21  	flive := b.FlagsLiveAtEnd
    22  	for _, c := range b.ControlValues() {
    23  		flive = c.Type.IsFlags() || flive
    24  	}
    25  	for i := len(b.Values) - 1; i >= 0; i-- {
    26  		v := b.Values[i]
    27  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    28  			// The "mark" is any non-nil Aux value.
    29  			v.Aux = v
    30  		}
    31  		if v.Type.IsFlags() {
    32  			flive = false
    33  		}
    34  		for _, a := range v.Args {
    35  			if a.Type.IsFlags() {
    36  				flive = true
    37  			}
    38  		}
    39  	}
    40  }
    41  
    42  // loadByType returns the load instruction of the given type.
    43  func loadByType(t *types.Type) obj.As {
    44  	// Avoid partial register write
    45  	if !t.IsFloat() && t.Size() <= 2 {
    46  		if t.Size() == 1 {
    47  			return x86.AMOVBLZX
    48  		} else {
    49  			return x86.AMOVWLZX
    50  		}
    51  	}
    52  	// Otherwise, there's no difference between load and store opcodes.
    53  	return storeByType(t)
    54  }
    55  
    56  // storeByType returns the store instruction of the given type.
    57  func storeByType(t *types.Type) obj.As {
    58  	width := t.Size()
    59  	if t.IsFloat() {
    60  		switch width {
    61  		case 4:
    62  			return x86.AMOVSS
    63  		case 8:
    64  			return x86.AMOVSD
    65  		}
    66  	} else {
    67  		switch width {
    68  		case 1:
    69  			return x86.AMOVB
    70  		case 2:
    71  			return x86.AMOVW
    72  		case 4:
    73  			return x86.AMOVL
    74  		case 8:
    75  			return x86.AMOVQ
    76  		}
    77  	}
    78  	panic("bad store type")
    79  }
    80  
    81  // moveByType returns the reg->reg move instruction of the given type.
    82  func moveByType(t *types.Type) obj.As {
    83  	if t.IsFloat() {
    84  		// Moving the whole sse2 register is faster
    85  		// than moving just the correct low portion of it.
    86  		// There is no xmm->xmm move with 1 byte opcode,
    87  		// so use movups, which has 2 byte opcode.
    88  		return x86.AMOVUPS
    89  	} else {
    90  		switch t.Size() {
    91  		case 1:
    92  			// Avoids partial register write
    93  			return x86.AMOVL
    94  		case 2:
    95  			return x86.AMOVL
    96  		case 4:
    97  			return x86.AMOVL
    98  		case 8:
    99  			return x86.AMOVQ
   100  		case 16:
   101  			return x86.AMOVUPS // int128s are in SSE registers
   102  		default:
   103  			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
   104  		}
   105  	}
   106  }
   107  
   108  // opregreg emits instructions for
   109  //     dest := dest(To) op src(From)
   110  // and also returns the created obj.Prog so it
   111  // may be further adjusted (offset, scale, etc).
   112  func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
   113  	p := s.Prog(op)
   114  	p.From.Type = obj.TYPE_REG
   115  	p.To.Type = obj.TYPE_REG
   116  	p.To.Reg = dest
   117  	p.From.Reg = src
   118  	return p
   119  }
   120  
   121  // memIdx fills out a as an indexed memory reference for v.
   122  // It assumes that the base register and the index register
   123  // are v.Args[0].Reg() and v.Args[1].Reg(), respectively.
   124  // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary.
   125  func memIdx(a *obj.Addr, v *ssa.Value) {
   126  	r, i := v.Args[0].Reg(), v.Args[1].Reg()
   127  	a.Type = obj.TYPE_MEM
   128  	a.Scale = v.Op.Scale()
   129  	if a.Scale == 1 && i == x86.REG_SP {
   130  		r, i = i, r
   131  	}
   132  	a.Reg = r
   133  	a.Index = i
   134  }
   135  
   136  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
   137  // See runtime/mkduff.go.
   138  func duffStart(size int64) int64 {
   139  	x, _ := duff(size)
   140  	return x
   141  }
   142  func duffAdj(size int64) int64 {
   143  	_, x := duff(size)
   144  	return x
   145  }
   146  
   147  // duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
   148  // required to use the duffzero mechanism for a block of the given size.
   149  func duff(size int64) (int64, int64) {
   150  	if size < 32 || size > 1024 || size%dzClearStep != 0 {
   151  		panic("bad duffzero size")
   152  	}
   153  	steps := size / dzClearStep
   154  	blocks := steps / dzBlockLen
   155  	steps %= dzBlockLen
   156  	off := dzBlockSize * (dzBlocks - blocks)
   157  	var adj int64
   158  	if steps != 0 {
   159  		off -= dzLeaqSize
   160  		off -= dzMovSize * steps
   161  		adj -= dzClearStep * (dzBlockLen - steps)
   162  	}
   163  	return off, adj
   164  }
   165  
   166  func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
   167  	switch v.Op {
   168  	case ssa.OpAMD64VFMADD231SD:
   169  		p := s.Prog(v.Op.Asm())
   170  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
   171  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   172  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()})
   173  		if v.Reg() != v.Args[0].Reg() {
   174  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   175  		}
   176  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   177  		r := v.Reg()
   178  		r1 := v.Args[0].Reg()
   179  		r2 := v.Args[1].Reg()
   180  		switch {
   181  		case r == r1:
   182  			p := s.Prog(v.Op.Asm())
   183  			p.From.Type = obj.TYPE_REG
   184  			p.From.Reg = r2
   185  			p.To.Type = obj.TYPE_REG
   186  			p.To.Reg = r
   187  		case r == r2:
   188  			p := s.Prog(v.Op.Asm())
   189  			p.From.Type = obj.TYPE_REG
   190  			p.From.Reg = r1
   191  			p.To.Type = obj.TYPE_REG
   192  			p.To.Reg = r
   193  		default:
   194  			var asm obj.As
   195  			if v.Op == ssa.OpAMD64ADDQ {
   196  				asm = x86.ALEAQ
   197  			} else {
   198  				asm = x86.ALEAL
   199  			}
   200  			p := s.Prog(asm)
   201  			p.From.Type = obj.TYPE_MEM
   202  			p.From.Reg = r1
   203  			p.From.Scale = 1
   204  			p.From.Index = r2
   205  			p.To.Type = obj.TYPE_REG
   206  			p.To.Reg = r
   207  		}
   208  	// 2-address opcode arithmetic
   209  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   210  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   211  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   212  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   213  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   214  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   215  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   216  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   217  		ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
   218  		ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
   219  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   220  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   221  		ssa.OpAMD64PXOR,
   222  		ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
   223  		ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
   224  		ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
   225  		r := v.Reg()
   226  		if r != v.Args[0].Reg() {
   227  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   228  		}
   229  		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
   230  
   231  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   232  		// Arg[0] (the dividend) is in AX.
   233  		// Arg[1] (the divisor) can be in any other register.
   234  		// Result[0] (the quotient) is in AX.
   235  		// Result[1] (the remainder) is in DX.
   236  		r := v.Args[1].Reg()
   237  
   238  		// Zero extend dividend.
   239  		c := s.Prog(x86.AXORL)
   240  		c.From.Type = obj.TYPE_REG
   241  		c.From.Reg = x86.REG_DX
   242  		c.To.Type = obj.TYPE_REG
   243  		c.To.Reg = x86.REG_DX
   244  
   245  		// Issue divide.
   246  		p := s.Prog(v.Op.Asm())
   247  		p.From.Type = obj.TYPE_REG
   248  		p.From.Reg = r
   249  
   250  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   251  		// Arg[0] (the dividend) is in AX.
   252  		// Arg[1] (the divisor) can be in any other register.
   253  		// Result[0] (the quotient) is in AX.
   254  		// Result[1] (the remainder) is in DX.
   255  		r := v.Args[1].Reg()
   256  		var j1 *obj.Prog
   257  
   258  		// CPU faults upon signed overflow, which occurs when the most
   259  		// negative int is divided by -1. Handle divide by -1 as a special case.
   260  		if ssa.DivisionNeedsFixUp(v) {
   261  			var c *obj.Prog
   262  			switch v.Op {
   263  			case ssa.OpAMD64DIVQ:
   264  				c = s.Prog(x86.ACMPQ)
   265  			case ssa.OpAMD64DIVL:
   266  				c = s.Prog(x86.ACMPL)
   267  			case ssa.OpAMD64DIVW:
   268  				c = s.Prog(x86.ACMPW)
   269  			}
   270  			c.From.Type = obj.TYPE_REG
   271  			c.From.Reg = r
   272  			c.To.Type = obj.TYPE_CONST
   273  			c.To.Offset = -1
   274  			j1 = s.Prog(x86.AJEQ)
   275  			j1.To.Type = obj.TYPE_BRANCH
   276  		}
   277  
   278  		// Sign extend dividend.
   279  		switch v.Op {
   280  		case ssa.OpAMD64DIVQ:
   281  			s.Prog(x86.ACQO)
   282  		case ssa.OpAMD64DIVL:
   283  			s.Prog(x86.ACDQ)
   284  		case ssa.OpAMD64DIVW:
   285  			s.Prog(x86.ACWD)
   286  		}
   287  
   288  		// Issue divide.
   289  		p := s.Prog(v.Op.Asm())
   290  		p.From.Type = obj.TYPE_REG
   291  		p.From.Reg = r
   292  
   293  		if j1 != nil {
   294  			// Skip over -1 fixup code.
   295  			j2 := s.Prog(obj.AJMP)
   296  			j2.To.Type = obj.TYPE_BRANCH
   297  
   298  			// Issue -1 fixup code.
   299  			// n / -1 = -n
   300  			var n1 *obj.Prog
   301  			switch v.Op {
   302  			case ssa.OpAMD64DIVQ:
   303  				n1 = s.Prog(x86.ANEGQ)
   304  			case ssa.OpAMD64DIVL:
   305  				n1 = s.Prog(x86.ANEGL)
   306  			case ssa.OpAMD64DIVW:
   307  				n1 = s.Prog(x86.ANEGW)
   308  			}
   309  			n1.To.Type = obj.TYPE_REG
   310  			n1.To.Reg = x86.REG_AX
   311  
   312  			// n % -1 == 0
   313  			n2 := s.Prog(x86.AXORL)
   314  			n2.From.Type = obj.TYPE_REG
   315  			n2.From.Reg = x86.REG_DX
   316  			n2.To.Type = obj.TYPE_REG
   317  			n2.To.Reg = x86.REG_DX
   318  
   319  			// TODO(khr): issue only the -1 fixup code we need.
   320  			// For instance, if only the quotient is used, no point in zeroing the remainder.
   321  
   322  			j1.To.Val = n1
   323  			j2.To.Val = s.Pc()
   324  		}
   325  
   326  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   327  		// the frontend rewrites constant division by 8/16/32 bit integers into
   328  		// HMUL by a constant
   329  		// SSA rewrites generate the 64 bit versions
   330  
   331  		// Arg[0] is already in AX as it's the only register we allow
   332  		// and DX is the only output we care about (the high bits)
   333  		p := s.Prog(v.Op.Asm())
   334  		p.From.Type = obj.TYPE_REG
   335  		p.From.Reg = v.Args[1].Reg()
   336  
   337  		// IMULB puts the high portion in AH instead of DL,
   338  		// so move it to DL for consistency
   339  		if v.Type.Size() == 1 {
   340  			m := s.Prog(x86.AMOVB)
   341  			m.From.Type = obj.TYPE_REG
   342  			m.From.Reg = x86.REG_AH
   343  			m.To.Type = obj.TYPE_REG
   344  			m.To.Reg = x86.REG_DX
   345  		}
   346  
   347  	case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
   348  		// Arg[0] is already in AX as it's the only register we allow
   349  		// results lo in AX
   350  		p := s.Prog(v.Op.Asm())
   351  		p.From.Type = obj.TYPE_REG
   352  		p.From.Reg = v.Args[1].Reg()
   353  
   354  	case ssa.OpAMD64MULQU2:
   355  		// Arg[0] is already in AX as it's the only register we allow
   356  		// results hi in DX, lo in AX
   357  		p := s.Prog(v.Op.Asm())
   358  		p.From.Type = obj.TYPE_REG
   359  		p.From.Reg = v.Args[1].Reg()
   360  
   361  	case ssa.OpAMD64DIVQU2:
   362  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   363  		// results q in AX, r in DX
   364  		p := s.Prog(v.Op.Asm())
   365  		p.From.Type = obj.TYPE_REG
   366  		p.From.Reg = v.Args[2].Reg()
   367  
   368  	case ssa.OpAMD64AVGQU:
   369  		// compute (x+y)/2 unsigned.
   370  		// Do a 64-bit add, the overflow goes into the carry.
   371  		// Shift right once and pull the carry back into the 63rd bit.
   372  		r := v.Reg()
   373  		if r != v.Args[0].Reg() {
   374  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   375  		}
   376  		p := s.Prog(x86.AADDQ)
   377  		p.From.Type = obj.TYPE_REG
   378  		p.To.Type = obj.TYPE_REG
   379  		p.To.Reg = r
   380  		p.From.Reg = v.Args[1].Reg()
   381  		p = s.Prog(x86.ARCRQ)
   382  		p.From.Type = obj.TYPE_CONST
   383  		p.From.Offset = 1
   384  		p.To.Type = obj.TYPE_REG
   385  		p.To.Reg = r
   386  
   387  	case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
   388  		r := v.Reg0()
   389  		r0 := v.Args[0].Reg()
   390  		r1 := v.Args[1].Reg()
   391  		switch r {
   392  		case r0:
   393  			p := s.Prog(v.Op.Asm())
   394  			p.From.Type = obj.TYPE_REG
   395  			p.From.Reg = r1
   396  			p.To.Type = obj.TYPE_REG
   397  			p.To.Reg = r
   398  		case r1:
   399  			p := s.Prog(v.Op.Asm())
   400  			p.From.Type = obj.TYPE_REG
   401  			p.From.Reg = r0
   402  			p.To.Type = obj.TYPE_REG
   403  			p.To.Reg = r
   404  		default:
   405  			v.Fatalf("output not in same register as an input %s", v.LongString())
   406  		}
   407  
   408  	case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
   409  		p := s.Prog(v.Op.Asm())
   410  		p.From.Type = obj.TYPE_REG
   411  		p.From.Reg = v.Args[1].Reg()
   412  		p.To.Type = obj.TYPE_REG
   413  		p.To.Reg = v.Reg0()
   414  
   415  	case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
   416  		p := s.Prog(v.Op.Asm())
   417  		p.From.Type = obj.TYPE_CONST
   418  		p.From.Offset = v.AuxInt
   419  		p.To.Type = obj.TYPE_REG
   420  		p.To.Reg = v.Reg0()
   421  
   422  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   423  		r := v.Reg()
   424  		a := v.Args[0].Reg()
   425  		if r == a {
   426  			switch v.AuxInt {
   427  			case 1:
   428  				var asm obj.As
   429  				// Software optimization manual recommends add $1,reg.
   430  				// But inc/dec is 1 byte smaller. ICC always uses inc
   431  				// Clang/GCC choose depending on flags, but prefer add.
   432  				// Experiments show that inc/dec is both a little faster
   433  				// and make a binary a little smaller.
   434  				if v.Op == ssa.OpAMD64ADDQconst {
   435  					asm = x86.AINCQ
   436  				} else {
   437  					asm = x86.AINCL
   438  				}
   439  				p := s.Prog(asm)
   440  				p.To.Type = obj.TYPE_REG
   441  				p.To.Reg = r
   442  				return
   443  			case -1:
   444  				var asm obj.As
   445  				if v.Op == ssa.OpAMD64ADDQconst {
   446  					asm = x86.ADECQ
   447  				} else {
   448  					asm = x86.ADECL
   449  				}
   450  				p := s.Prog(asm)
   451  				p.To.Type = obj.TYPE_REG
   452  				p.To.Reg = r
   453  				return
   454  			case 0x80:
   455  				// 'SUBQ $-0x80, r' is shorter to encode than
   456  				// and functionally equivalent to 'ADDQ $0x80, r'.
   457  				asm := x86.ASUBL
   458  				if v.Op == ssa.OpAMD64ADDQconst {
   459  					asm = x86.ASUBQ
   460  				}
   461  				p := s.Prog(asm)
   462  				p.From.Type = obj.TYPE_CONST
   463  				p.From.Offset = -0x80
   464  				p.To.Type = obj.TYPE_REG
   465  				p.To.Reg = r
   466  				return
   467  
   468  			}
   469  			p := s.Prog(v.Op.Asm())
   470  			p.From.Type = obj.TYPE_CONST
   471  			p.From.Offset = v.AuxInt
   472  			p.To.Type = obj.TYPE_REG
   473  			p.To.Reg = r
   474  			return
   475  		}
   476  		var asm obj.As
   477  		if v.Op == ssa.OpAMD64ADDQconst {
   478  			asm = x86.ALEAQ
   479  		} else {
   480  			asm = x86.ALEAL
   481  		}
   482  		p := s.Prog(asm)
   483  		p.From.Type = obj.TYPE_MEM
   484  		p.From.Reg = a
   485  		p.From.Offset = v.AuxInt
   486  		p.To.Type = obj.TYPE_REG
   487  		p.To.Reg = r
   488  
   489  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
   490  		ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
   491  		ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
   492  		ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
   493  		ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
   494  		ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
   495  		ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
   496  		ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
   497  		ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
   498  		ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
   499  		ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
   500  		ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
   501  		r := v.Reg()
   502  		if r != v.Args[0].Reg() {
   503  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   504  		}
   505  		p := s.Prog(v.Op.Asm())
   506  		p.From.Type = obj.TYPE_REG
   507  		p.From.Reg = v.Args[1].Reg()
   508  		p.To.Type = obj.TYPE_REG
   509  		p.To.Reg = r
   510  
   511  	case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
   512  		r := v.Reg()
   513  		if r != v.Args[0].Reg() {
   514  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   515  		}
   516  		// Flag condition: ^ZERO || PARITY
   517  		// Generate:
   518  		//   CMOV*NE  SRC,DST
   519  		//   CMOV*PS  SRC,DST
   520  		p := s.Prog(v.Op.Asm())
   521  		p.From.Type = obj.TYPE_REG
   522  		p.From.Reg = v.Args[1].Reg()
   523  		p.To.Type = obj.TYPE_REG
   524  		p.To.Reg = r
   525  		var q *obj.Prog
   526  		if v.Op == ssa.OpAMD64CMOVQNEF {
   527  			q = s.Prog(x86.ACMOVQPS)
   528  		} else if v.Op == ssa.OpAMD64CMOVLNEF {
   529  			q = s.Prog(x86.ACMOVLPS)
   530  		} else {
   531  			q = s.Prog(x86.ACMOVWPS)
   532  		}
   533  		q.From.Type = obj.TYPE_REG
   534  		q.From.Reg = v.Args[1].Reg()
   535  		q.To.Type = obj.TYPE_REG
   536  		q.To.Reg = r
   537  
   538  	case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
   539  		r := v.Reg()
   540  		if r != v.Args[0].Reg() {
   541  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   542  		}
   543  
   544  		// Flag condition: ZERO && !PARITY
   545  		// Generate:
   546  		//   MOV      SRC,AX
   547  		//   CMOV*NE  DST,AX
   548  		//   CMOV*PC  AX,DST
   549  		//
   550  		// TODO(rasky): we could generate:
   551  		//   CMOV*NE  DST,SRC
   552  		//   CMOV*PC  SRC,DST
   553  		// But this requires a way for regalloc to know that SRC might be
   554  		// clobbered by this instruction.
   555  		if v.Args[1].Reg() != x86.REG_AX {
   556  			opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
   557  		}
   558  		p := s.Prog(v.Op.Asm())
   559  		p.From.Type = obj.TYPE_REG
   560  		p.From.Reg = r
   561  		p.To.Type = obj.TYPE_REG
   562  		p.To.Reg = x86.REG_AX
   563  		var q *obj.Prog
   564  		if v.Op == ssa.OpAMD64CMOVQEQF {
   565  			q = s.Prog(x86.ACMOVQPC)
   566  		} else if v.Op == ssa.OpAMD64CMOVLEQF {
   567  			q = s.Prog(x86.ACMOVLPC)
   568  		} else {
   569  			q = s.Prog(x86.ACMOVWPC)
   570  		}
   571  		q.From.Type = obj.TYPE_REG
   572  		q.From.Reg = x86.REG_AX
   573  		q.To.Type = obj.TYPE_REG
   574  		q.To.Reg = r
   575  
   576  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   577  		r := v.Reg()
   578  		p := s.Prog(v.Op.Asm())
   579  		p.From.Type = obj.TYPE_CONST
   580  		p.From.Offset = v.AuxInt
   581  		p.To.Type = obj.TYPE_REG
   582  		p.To.Reg = r
   583  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
   584  
   585  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   586  		ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
   587  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   588  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   589  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   590  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   591  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   592  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   593  		r := v.Reg()
   594  		if r != v.Args[0].Reg() {
   595  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   596  		}
   597  		p := s.Prog(v.Op.Asm())
   598  		p.From.Type = obj.TYPE_CONST
   599  		p.From.Offset = v.AuxInt
   600  		p.To.Type = obj.TYPE_REG
   601  		p.To.Reg = r
   602  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   603  		r := v.Reg()
   604  		p := s.Prog(v.Op.Asm())
   605  		p.From.Type = obj.TYPE_REG
   606  		p.From.Reg = r
   607  		p.To.Type = obj.TYPE_REG
   608  		p.To.Reg = r
   609  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
   610  		ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
   611  		ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   612  		p := s.Prog(v.Op.Asm())
   613  		memIdx(&p.From, v)
   614  		o := v.Reg()
   615  		p.To.Type = obj.TYPE_REG
   616  		p.To.Reg = o
   617  		if v.AuxInt != 0 && v.Aux == nil {
   618  			// Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA.
   619  			switch v.Op {
   620  			case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   621  				p = s.Prog(x86.ALEAQ)
   622  			case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
   623  				p = s.Prog(x86.ALEAL)
   624  			case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   625  				p = s.Prog(x86.ALEAW)
   626  			}
   627  			p.From.Type = obj.TYPE_MEM
   628  			p.From.Reg = o
   629  			p.To.Type = obj.TYPE_REG
   630  			p.To.Reg = o
   631  		}
   632  		gc.AddAux(&p.From, v)
   633  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
   634  		p := s.Prog(v.Op.Asm())
   635  		p.From.Type = obj.TYPE_MEM
   636  		p.From.Reg = v.Args[0].Reg()
   637  		gc.AddAux(&p.From, v)
   638  		p.To.Type = obj.TYPE_REG
   639  		p.To.Reg = v.Reg()
   640  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   641  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   642  		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   643  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   644  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   645  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   646  		// must account for that right here.
   647  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   648  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   649  		p := s.Prog(v.Op.Asm())
   650  		p.From.Type = obj.TYPE_REG
   651  		p.From.Reg = v.Args[0].Reg()
   652  		p.To.Type = obj.TYPE_CONST
   653  		p.To.Offset = v.AuxInt
   654  	case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
   655  		ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   656  		ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
   657  		ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
   658  		ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
   659  		op := v.Op
   660  		if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
   661  			// Emit 32-bit version because it's shorter
   662  			op = ssa.OpAMD64BTLconst
   663  		}
   664  		p := s.Prog(op.Asm())
   665  		p.From.Type = obj.TYPE_CONST
   666  		p.From.Offset = v.AuxInt
   667  		p.To.Type = obj.TYPE_REG
   668  		p.To.Reg = v.Args[0].Reg()
   669  	case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
   670  		p := s.Prog(v.Op.Asm())
   671  		p.From.Type = obj.TYPE_MEM
   672  		p.From.Reg = v.Args[0].Reg()
   673  		gc.AddAux(&p.From, v)
   674  		p.To.Type = obj.TYPE_REG
   675  		p.To.Reg = v.Args[1].Reg()
   676  	case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
   677  		sc := v.AuxValAndOff()
   678  		p := s.Prog(v.Op.Asm())
   679  		p.From.Type = obj.TYPE_MEM
   680  		p.From.Reg = v.Args[0].Reg()
   681  		gc.AddAux2(&p.From, v, sc.Off())
   682  		p.To.Type = obj.TYPE_CONST
   683  		p.To.Offset = sc.Val()
   684  	case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
   685  		p := s.Prog(v.Op.Asm())
   686  		memIdx(&p.From, v)
   687  		gc.AddAux(&p.From, v)
   688  		p.To.Type = obj.TYPE_REG
   689  		p.To.Reg = v.Args[2].Reg()
   690  	case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
   691  		sc := v.AuxValAndOff()
   692  		p := s.Prog(v.Op.Asm())
   693  		memIdx(&p.From, v)
   694  		gc.AddAux2(&p.From, v, sc.Off())
   695  		p.To.Type = obj.TYPE_CONST
   696  		p.To.Offset = sc.Val()
   697  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   698  		x := v.Reg()
   699  
   700  		// If flags aren't live (indicated by v.Aux == nil),
   701  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   702  		if v.AuxInt == 0 && v.Aux == nil {
   703  			p := s.Prog(x86.AXORL)
   704  			p.From.Type = obj.TYPE_REG
   705  			p.From.Reg = x
   706  			p.To.Type = obj.TYPE_REG
   707  			p.To.Reg = x
   708  			break
   709  		}
   710  
   711  		asm := v.Op.Asm()
   712  		// Use MOVL to move a small constant into a register
   713  		// when the constant is positive and fits into 32 bits.
   714  		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   715  			// The upper 32bit are zeroed automatically when using MOVL.
   716  			asm = x86.AMOVL
   717  		}
   718  		p := s.Prog(asm)
   719  		p.From.Type = obj.TYPE_CONST
   720  		p.From.Offset = v.AuxInt
   721  		p.To.Type = obj.TYPE_REG
   722  		p.To.Reg = x
   723  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   724  		x := v.Reg()
   725  		p := s.Prog(v.Op.Asm())
   726  		p.From.Type = obj.TYPE_FCONST
   727  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   728  		p.To.Type = obj.TYPE_REG
   729  		p.To.Reg = x
   730  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
   731  		p := s.Prog(v.Op.Asm())
   732  		p.From.Type = obj.TYPE_MEM
   733  		p.From.Reg = v.Args[0].Reg()
   734  		gc.AddAux(&p.From, v)
   735  		p.To.Type = obj.TYPE_REG
   736  		p.To.Reg = v.Reg()
   737  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
   738  		ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2:
   739  		p := s.Prog(v.Op.Asm())
   740  		memIdx(&p.From, v)
   741  		gc.AddAux(&p.From, v)
   742  		p.To.Type = obj.TYPE_REG
   743  		p.To.Reg = v.Reg()
   744  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
   745  		ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify,
   746  		ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
   747  		ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
   748  		p := s.Prog(v.Op.Asm())
   749  		p.From.Type = obj.TYPE_REG
   750  		p.From.Reg = v.Args[1].Reg()
   751  		p.To.Type = obj.TYPE_MEM
   752  		p.To.Reg = v.Args[0].Reg()
   753  		gc.AddAux(&p.To, v)
   754  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
   755  		ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
   756  		ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
   757  		ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
   758  		ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
   759  		ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
   760  		ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8:
   761  		p := s.Prog(v.Op.Asm())
   762  		p.From.Type = obj.TYPE_REG
   763  		p.From.Reg = v.Args[2].Reg()
   764  		memIdx(&p.To, v)
   765  		gc.AddAux(&p.To, v)
   766  	case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
   767  		sc := v.AuxValAndOff()
   768  		off := sc.Off()
   769  		val := sc.Val()
   770  		if val == 1 || val == -1 {
   771  			var asm obj.As
   772  			if v.Op == ssa.OpAMD64ADDQconstmodify {
   773  				if val == 1 {
   774  					asm = x86.AINCQ
   775  				} else {
   776  					asm = x86.ADECQ
   777  				}
   778  			} else {
   779  				if val == 1 {
   780  					asm = x86.AINCL
   781  				} else {
   782  					asm = x86.ADECL
   783  				}
   784  			}
   785  			p := s.Prog(asm)
   786  			p.To.Type = obj.TYPE_MEM
   787  			p.To.Reg = v.Args[0].Reg()
   788  			gc.AddAux2(&p.To, v, off)
   789  			break
   790  		}
   791  		fallthrough
   792  	case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
   793  		ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify,
   794  		ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
   795  		sc := v.AuxValAndOff()
   796  		off := sc.Off()
   797  		val := sc.Val()
   798  		p := s.Prog(v.Op.Asm())
   799  		p.From.Type = obj.TYPE_CONST
   800  		p.From.Offset = val
   801  		p.To.Type = obj.TYPE_MEM
   802  		p.To.Reg = v.Args[0].Reg()
   803  		gc.AddAux2(&p.To, v, off)
   804  
   805  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   806  		p := s.Prog(v.Op.Asm())
   807  		p.From.Type = obj.TYPE_CONST
   808  		sc := v.AuxValAndOff()
   809  		p.From.Offset = sc.Val()
   810  		p.To.Type = obj.TYPE_MEM
   811  		p.To.Reg = v.Args[0].Reg()
   812  		gc.AddAux2(&p.To, v, sc.Off())
   813  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
   814  		ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
   815  		ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
   816  		ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
   817  		ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
   818  		p := s.Prog(v.Op.Asm())
   819  		p.From.Type = obj.TYPE_CONST
   820  		sc := v.AuxValAndOff()
   821  		p.From.Offset = sc.Val()
   822  		switch {
   823  		case p.As == x86.AADDQ && p.From.Offset == 1:
   824  			p.As = x86.AINCQ
   825  			p.From.Type = obj.TYPE_NONE
   826  		case p.As == x86.AADDQ && p.From.Offset == -1:
   827  			p.As = x86.ADECQ
   828  			p.From.Type = obj.TYPE_NONE
   829  		case p.As == x86.AADDL && p.From.Offset == 1:
   830  			p.As = x86.AINCL
   831  			p.From.Type = obj.TYPE_NONE
   832  		case p.As == x86.AADDL && p.From.Offset == -1:
   833  			p.As = x86.ADECL
   834  			p.From.Type = obj.TYPE_NONE
   835  		}
   836  		memIdx(&p.To, v)
   837  		gc.AddAux2(&p.To, v, sc.Off())
   838  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   839  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   840  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
   841  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   842  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   843  		r := v.Reg()
   844  		// Break false dependency on destination register.
   845  		opregreg(s, x86.AXORPS, r, r)
   846  		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   847  	case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   848  		var p *obj.Prog
   849  		switch v.Op {
   850  		case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
   851  			p = s.Prog(x86.AMOVQ)
   852  		case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   853  			p = s.Prog(x86.AMOVL)
   854  		}
   855  		p.From.Type = obj.TYPE_REG
   856  		p.From.Reg = v.Args[0].Reg()
   857  		p.To.Type = obj.TYPE_REG
   858  		p.To.Reg = v.Reg()
   859  	case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
   860  		ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
   861  		ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
   862  		ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
   863  		ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
   864  		p := s.Prog(v.Op.Asm())
   865  		p.From.Type = obj.TYPE_MEM
   866  		p.From.Reg = v.Args[1].Reg()
   867  		gc.AddAux(&p.From, v)
   868  		p.To.Type = obj.TYPE_REG
   869  		p.To.Reg = v.Reg()
   870  		if v.Reg() != v.Args[0].Reg() {
   871  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   872  		}
   873  	case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
   874  		ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
   875  		ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
   876  		ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
   877  		ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
   878  		ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
   879  		ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
   880  		ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
   881  		ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
   882  		p := s.Prog(v.Op.Asm())
   883  
   884  		r, i := v.Args[1].Reg(), v.Args[2].Reg()
   885  		p.From.Type = obj.TYPE_MEM
   886  		p.From.Scale = v.Op.Scale()
   887  		if p.From.Scale == 1 && i == x86.REG_SP {
   888  			r, i = i, r
   889  		}
   890  		p.From.Reg = r
   891  		p.From.Index = i
   892  
   893  		gc.AddAux(&p.From, v)
   894  		p.To.Type = obj.TYPE_REG
   895  		p.To.Reg = v.Reg()
   896  		if v.Reg() != v.Args[0].Reg() {
   897  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   898  		}
   899  	case ssa.OpAMD64DUFFZERO:
   900  		off := duffStart(v.AuxInt)
   901  		adj := duffAdj(v.AuxInt)
   902  		var p *obj.Prog
   903  		if adj != 0 {
   904  			p = s.Prog(x86.ALEAQ)
   905  			p.From.Type = obj.TYPE_MEM
   906  			p.From.Offset = adj
   907  			p.From.Reg = x86.REG_DI
   908  			p.To.Type = obj.TYPE_REG
   909  			p.To.Reg = x86.REG_DI
   910  		}
   911  		p = s.Prog(obj.ADUFFZERO)
   912  		p.To.Type = obj.TYPE_ADDR
   913  		p.To.Sym = gc.Duffzero
   914  		p.To.Offset = off
   915  	case ssa.OpAMD64MOVOconst:
   916  		if v.AuxInt != 0 {
   917  			v.Fatalf("MOVOconst can only do constant=0")
   918  		}
   919  		r := v.Reg()
   920  		opregreg(s, x86.AXORPS, r, r)
   921  	case ssa.OpAMD64DUFFCOPY:
   922  		p := s.Prog(obj.ADUFFCOPY)
   923  		p.To.Type = obj.TYPE_ADDR
   924  		p.To.Sym = gc.Duffcopy
   925  		if v.AuxInt%16 != 0 {
   926  			v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt)
   927  		}
   928  		p.To.Offset = 14 * (64 - v.AuxInt/16)
   929  		// 14 and 64 are magic constants.  14 is the number of bytes to encode:
   930  		//	MOVUPS	(SI), X0
   931  		//	ADDQ	$16, SI
   932  		//	MOVUPS	X0, (DI)
   933  		//	ADDQ	$16, DI
   934  		// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
   935  
   936  	case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
   937  		if v.Type.IsMemory() {
   938  			return
   939  		}
   940  		x := v.Args[0].Reg()
   941  		y := v.Reg()
   942  		if x != y {
   943  			opregreg(s, moveByType(v.Type), y, x)
   944  		}
   945  	case ssa.OpLoadReg:
   946  		if v.Type.IsFlags() {
   947  			v.Fatalf("load flags not implemented: %v", v.LongString())
   948  			return
   949  		}
   950  		p := s.Prog(loadByType(v.Type))
   951  		gc.AddrAuto(&p.From, v.Args[0])
   952  		p.To.Type = obj.TYPE_REG
   953  		p.To.Reg = v.Reg()
   954  
   955  	case ssa.OpStoreReg:
   956  		if v.Type.IsFlags() {
   957  			v.Fatalf("store flags not implemented: %v", v.LongString())
   958  			return
   959  		}
   960  		p := s.Prog(storeByType(v.Type))
   961  		p.From.Type = obj.TYPE_REG
   962  		p.From.Reg = v.Args[0].Reg()
   963  		gc.AddrAuto(&p.To, v)
   964  	case ssa.OpAMD64LoweredHasCPUFeature:
   965  		p := s.Prog(x86.AMOVBQZX)
   966  		p.From.Type = obj.TYPE_MEM
   967  		gc.AddAux(&p.From, v)
   968  		p.To.Type = obj.TYPE_REG
   969  		p.To.Reg = v.Reg()
   970  	case ssa.OpAMD64LoweredGetClosurePtr:
   971  		// Closure pointer is DX.
   972  		gc.CheckLoweredGetClosurePtr(v)
   973  	case ssa.OpAMD64LoweredGetG:
   974  		r := v.Reg()
   975  		// See the comments in cmd/internal/obj/x86/obj6.go
   976  		// near CanUse1InsnTLS for a detailed explanation of these instructions.
   977  		if x86.CanUse1InsnTLS(gc.Ctxt) {
   978  			// MOVQ (TLS), r
   979  			p := s.Prog(x86.AMOVQ)
   980  			p.From.Type = obj.TYPE_MEM
   981  			p.From.Reg = x86.REG_TLS
   982  			p.To.Type = obj.TYPE_REG
   983  			p.To.Reg = r
   984  		} else {
   985  			// MOVQ TLS, r
   986  			// MOVQ (r)(TLS*1), r
   987  			p := s.Prog(x86.AMOVQ)
   988  			p.From.Type = obj.TYPE_REG
   989  			p.From.Reg = x86.REG_TLS
   990  			p.To.Type = obj.TYPE_REG
   991  			p.To.Reg = r
   992  			q := s.Prog(x86.AMOVQ)
   993  			q.From.Type = obj.TYPE_MEM
   994  			q.From.Reg = r
   995  			q.From.Index = x86.REG_TLS
   996  			q.From.Scale = 1
   997  			q.To.Type = obj.TYPE_REG
   998  			q.To.Reg = r
   999  		}
  1000  	case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
  1001  		s.Call(v)
  1002  
  1003  	case ssa.OpAMD64LoweredGetCallerPC:
  1004  		p := s.Prog(x86.AMOVQ)
  1005  		p.From.Type = obj.TYPE_MEM
  1006  		p.From.Offset = -8 // PC is stored 8 bytes below first parameter.
  1007  		p.From.Name = obj.NAME_PARAM
  1008  		p.To.Type = obj.TYPE_REG
  1009  		p.To.Reg = v.Reg()
  1010  
  1011  	case ssa.OpAMD64LoweredGetCallerSP:
  1012  		// caller's SP is the address of the first arg
  1013  		mov := x86.AMOVQ
  1014  		if gc.Widthptr == 4 {
  1015  			mov = x86.AMOVL
  1016  		}
  1017  		p := s.Prog(mov)
  1018  		p.From.Type = obj.TYPE_ADDR
  1019  		p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on amd64, just to be consistent with other architectures
  1020  		p.From.Name = obj.NAME_PARAM
  1021  		p.To.Type = obj.TYPE_REG
  1022  		p.To.Reg = v.Reg()
  1023  
  1024  	case ssa.OpAMD64LoweredWB:
  1025  		p := s.Prog(obj.ACALL)
  1026  		p.To.Type = obj.TYPE_MEM
  1027  		p.To.Name = obj.NAME_EXTERN
  1028  		// arg0 is in DI. Set sym to match where regalloc put arg1.
  1029  		p.To.Sym = gc.GCWriteBarrierReg[v.Args[1].Reg()]
  1030  
  1031  	case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC:
  1032  		p := s.Prog(obj.ACALL)
  1033  		p.To.Type = obj.TYPE_MEM
  1034  		p.To.Name = obj.NAME_EXTERN
  1035  		p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
  1036  		s.UseArgs(int64(2 * gc.Widthptr)) // space used in callee args area by assembly stubs
  1037  
  1038  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
  1039  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
  1040  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
  1041  		r := v.Reg()
  1042  		if r != v.Args[0].Reg() {
  1043  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
  1044  		}
  1045  		p := s.Prog(v.Op.Asm())
  1046  		p.To.Type = obj.TYPE_REG
  1047  		p.To.Reg = r
  1048  
  1049  	case ssa.OpAMD64NEGLflags:
  1050  		r := v.Reg0()
  1051  		if r != v.Args[0].Reg() {
  1052  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
  1053  		}
  1054  		p := s.Prog(v.Op.Asm())
  1055  		p.To.Type = obj.TYPE_REG
  1056  		p.To.Reg = r
  1057  
  1058  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD:
  1059  		p := s.Prog(v.Op.Asm())
  1060  		p.From.Type = obj.TYPE_REG
  1061  		p.From.Reg = v.Args[0].Reg()
  1062  		p.To.Type = obj.TYPE_REG
  1063  		switch v.Op {
  1064  		case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
  1065  			p.To.Reg = v.Reg0()
  1066  		case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD:
  1067  			p.To.Reg = v.Reg()
  1068  		}
  1069  	case ssa.OpAMD64ROUNDSD:
  1070  		p := s.Prog(v.Op.Asm())
  1071  		val := v.AuxInt
  1072  		// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
  1073  		if val != 0 && val != 1 && val != 2 && val != 3 {
  1074  			v.Fatalf("Invalid rounding mode")
  1075  		}
  1076  		p.From.Offset = val
  1077  		p.From.Type = obj.TYPE_CONST
  1078  		p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
  1079  		p.To.Type = obj.TYPE_REG
  1080  		p.To.Reg = v.Reg()
  1081  	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
  1082  		if v.Args[0].Reg() != v.Reg() {
  1083  			// POPCNT on Intel has a false dependency on the destination register.
  1084  			// Xor register with itself to break the dependency.
  1085  			p := s.Prog(x86.AXORQ)
  1086  			p.From.Type = obj.TYPE_REG
  1087  			p.From.Reg = v.Reg()
  1088  			p.To.Type = obj.TYPE_REG
  1089  			p.To.Reg = v.Reg()
  1090  		}
  1091  		p := s.Prog(v.Op.Asm())
  1092  		p.From.Type = obj.TYPE_REG
  1093  		p.From.Reg = v.Args[0].Reg()
  1094  		p.To.Type = obj.TYPE_REG
  1095  		p.To.Reg = v.Reg()
  1096  
  1097  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
  1098  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
  1099  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
  1100  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
  1101  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
  1102  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
  1103  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
  1104  		ssa.OpAMD64SETO:
  1105  		p := s.Prog(v.Op.Asm())
  1106  		p.To.Type = obj.TYPE_REG
  1107  		p.To.Reg = v.Reg()
  1108  
  1109  	case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
  1110  		ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
  1111  		ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
  1112  		ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
  1113  		ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
  1114  		p := s.Prog(v.Op.Asm())
  1115  		p.To.Type = obj.TYPE_MEM
  1116  		p.To.Reg = v.Args[0].Reg()
  1117  		gc.AddAux(&p.To, v)
  1118  
  1119  	case ssa.OpAMD64SETNEF:
  1120  		p := s.Prog(v.Op.Asm())
  1121  		p.To.Type = obj.TYPE_REG
  1122  		p.To.Reg = v.Reg()
  1123  		q := s.Prog(x86.ASETPS)
  1124  		q.To.Type = obj.TYPE_REG
  1125  		q.To.Reg = x86.REG_AX
  1126  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
  1127  		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
  1128  
  1129  	case ssa.OpAMD64SETEQF:
  1130  		p := s.Prog(v.Op.Asm())
  1131  		p.To.Type = obj.TYPE_REG
  1132  		p.To.Reg = v.Reg()
  1133  		q := s.Prog(x86.ASETPC)
  1134  		q.To.Type = obj.TYPE_REG
  1135  		q.To.Reg = x86.REG_AX
  1136  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
  1137  		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
  1138  
  1139  	case ssa.OpAMD64InvertFlags:
  1140  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1141  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
  1142  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1143  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
  1144  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
  1145  	case ssa.OpAMD64REPSTOSQ:
  1146  		s.Prog(x86.AREP)
  1147  		s.Prog(x86.ASTOSQ)
  1148  	case ssa.OpAMD64REPMOVSQ:
  1149  		s.Prog(x86.AREP)
  1150  		s.Prog(x86.AMOVSQ)
  1151  	case ssa.OpAMD64LoweredNilCheck:
  1152  		// Issue a load which will fault if the input is nil.
  1153  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
  1154  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
  1155  		// but it doesn't have false dependency on AX.
  1156  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
  1157  		// That trades clobbering flags for clobbering a register.
  1158  		p := s.Prog(x86.ATESTB)
  1159  		p.From.Type = obj.TYPE_REG
  1160  		p.From.Reg = x86.REG_AX
  1161  		p.To.Type = obj.TYPE_MEM
  1162  		p.To.Reg = v.Args[0].Reg()
  1163  		if logopt.Enabled() {
  1164  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1165  		}
  1166  		if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1167  			gc.Warnl(v.Pos, "generated nil check")
  1168  		}
  1169  	case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
  1170  		p := s.Prog(v.Op.Asm())
  1171  		p.From.Type = obj.TYPE_MEM
  1172  		p.From.Reg = v.Args[0].Reg()
  1173  		gc.AddAux(&p.From, v)
  1174  		p.To.Type = obj.TYPE_REG
  1175  		p.To.Reg = v.Reg0()
  1176  	case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
  1177  		r := v.Reg0()
  1178  		if r != v.Args[0].Reg() {
  1179  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
  1180  		}
  1181  		p := s.Prog(v.Op.Asm())
  1182  		p.From.Type = obj.TYPE_REG
  1183  		p.From.Reg = r
  1184  		p.To.Type = obj.TYPE_MEM
  1185  		p.To.Reg = v.Args[1].Reg()
  1186  		gc.AddAux(&p.To, v)
  1187  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
  1188  		r := v.Reg0()
  1189  		if r != v.Args[0].Reg() {
  1190  			v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
  1191  		}
  1192  		s.Prog(x86.ALOCK)
  1193  		p := s.Prog(v.Op.Asm())
  1194  		p.From.Type = obj.TYPE_REG
  1195  		p.From.Reg = r
  1196  		p.To.Type = obj.TYPE_MEM
  1197  		p.To.Reg = v.Args[1].Reg()
  1198  		gc.AddAux(&p.To, v)
  1199  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
  1200  		if v.Args[1].Reg() != x86.REG_AX {
  1201  			v.Fatalf("input[1] not in AX %s", v.LongString())
  1202  		}
  1203  		s.Prog(x86.ALOCK)
  1204  		p := s.Prog(v.Op.Asm())
  1205  		p.From.Type = obj.TYPE_REG
  1206  		p.From.Reg = v.Args[2].Reg()
  1207  		p.To.Type = obj.TYPE_MEM
  1208  		p.To.Reg = v.Args[0].Reg()
  1209  		gc.AddAux(&p.To, v)
  1210  		p = s.Prog(x86.ASETEQ)
  1211  		p.To.Type = obj.TYPE_REG
  1212  		p.To.Reg = v.Reg0()
  1213  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
  1214  		s.Prog(x86.ALOCK)
  1215  		p := s.Prog(v.Op.Asm())
  1216  		p.From.Type = obj.TYPE_REG
  1217  		p.From.Reg = v.Args[1].Reg()
  1218  		p.To.Type = obj.TYPE_MEM
  1219  		p.To.Reg = v.Args[0].Reg()
  1220  		gc.AddAux(&p.To, v)
  1221  	case ssa.OpClobber:
  1222  		p := s.Prog(x86.AMOVL)
  1223  		p.From.Type = obj.TYPE_CONST
  1224  		p.From.Offset = 0xdeaddead
  1225  		p.To.Type = obj.TYPE_MEM
  1226  		p.To.Reg = x86.REG_SP
  1227  		gc.AddAux(&p.To, v)
  1228  		p = s.Prog(x86.AMOVL)
  1229  		p.From.Type = obj.TYPE_CONST
  1230  		p.From.Offset = 0xdeaddead
  1231  		p.To.Type = obj.TYPE_MEM
  1232  		p.To.Reg = x86.REG_SP
  1233  		gc.AddAux(&p.To, v)
  1234  		p.To.Offset += 4
  1235  	default:
  1236  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1237  	}
  1238  }
  1239  
  1240  var blockJump = [...]struct {
  1241  	asm, invasm obj.As
  1242  }{
  1243  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
  1244  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
  1245  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
  1246  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
  1247  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
  1248  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
  1249  	ssa.BlockAMD64OS:  {x86.AJOS, x86.AJOC},
  1250  	ssa.BlockAMD64OC:  {x86.AJOC, x86.AJOS},
  1251  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
  1252  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
  1253  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
  1254  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
  1255  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
  1256  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
  1257  }
  1258  
  1259  var eqfJumps = [2][2]gc.IndexJump{
  1260  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
  1261  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
  1262  }
  1263  var nefJumps = [2][2]gc.IndexJump{
  1264  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
  1265  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
  1266  }
  1267  
  1268  func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
  1269  	switch b.Kind {
  1270  	case ssa.BlockPlain:
  1271  		if b.Succs[0].Block() != next {
  1272  			p := s.Prog(obj.AJMP)
  1273  			p.To.Type = obj.TYPE_BRANCH
  1274  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1275  		}
  1276  	case ssa.BlockDefer:
  1277  		// defer returns in rax:
  1278  		// 0 if we should continue executing
  1279  		// 1 if we should jump to deferreturn call
  1280  		p := s.Prog(x86.ATESTL)
  1281  		p.From.Type = obj.TYPE_REG
  1282  		p.From.Reg = x86.REG_AX
  1283  		p.To.Type = obj.TYPE_REG
  1284  		p.To.Reg = x86.REG_AX
  1285  		p = s.Prog(x86.AJNE)
  1286  		p.To.Type = obj.TYPE_BRANCH
  1287  		s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
  1288  		if b.Succs[0].Block() != next {
  1289  			p := s.Prog(obj.AJMP)
  1290  			p.To.Type = obj.TYPE_BRANCH
  1291  			s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
  1292  		}
  1293  	case ssa.BlockExit:
  1294  	case ssa.BlockRet:
  1295  		s.Prog(obj.ARET)
  1296  	case ssa.BlockRetJmp:
  1297  		p := s.Prog(obj.ARET)
  1298  		p.To.Type = obj.TYPE_MEM
  1299  		p.To.Name = obj.NAME_EXTERN
  1300  		p.To.Sym = b.Aux.(*obj.LSym)
  1301  
  1302  	case ssa.BlockAMD64EQF:
  1303  		s.CombJump(b, next, &eqfJumps)
  1304  
  1305  	case ssa.BlockAMD64NEF:
  1306  		s.CombJump(b, next, &nefJumps)
  1307  
  1308  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
  1309  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
  1310  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
  1311  		ssa.BlockAMD64OS, ssa.BlockAMD64OC,
  1312  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
  1313  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
  1314  		jmp := blockJump[b.Kind]
  1315  		switch next {
  1316  		case b.Succs[0].Block():
  1317  			s.Br(jmp.invasm, b.Succs[1].Block())
  1318  		case b.Succs[1].Block():
  1319  			s.Br(jmp.asm, b.Succs[0].Block())
  1320  		default:
  1321  			if b.Likely != ssa.BranchUnlikely {
  1322  				s.Br(jmp.asm, b.Succs[0].Block())
  1323  				s.Br(obj.AJMP, b.Succs[1].Block())
  1324  			} else {
  1325  				s.Br(jmp.invasm, b.Succs[1].Block())
  1326  				s.Br(obj.AJMP, b.Succs[0].Block())
  1327  			}
  1328  		}
  1329  
  1330  	default:
  1331  		b.Fatalf("branch not implemented: %s", b.LongString())
  1332  	}
  1333  }
  1334  

View as plain text