Black Lives Matter. Support the Equal Justice Initiative.

Source file src/cmd/internal/obj/x86/asm6.go

Documentation: cmd/internal/obj/x86

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  //
    63  const (
    64  	loopAlign  = 16
    65  	maxLoopPad = 0
    66  )
    67  
    68  // Bit flags that are used to express jump target properties.
    69  const (
    70  	// branchBackwards marks targets that are located behind.
    71  	// Used to express jumps to loop headers.
    72  	branchBackwards = (1 << iota)
    73  	// branchShort marks branches those target is close,
    74  	// with offset is in -128..127 range.
    75  	branchShort
    76  	// branchLoopHead marks loop entry.
    77  	// Used to insert padding for misaligned loops.
    78  	branchLoopHead
    79  )
    80  
    81  // opBytes holds optab encoding bytes.
    82  // Each ytab reserves fixed amount of bytes in this array.
    83  //
    84  // The size should be the minimal number of bytes that
    85  // are enough to hold biggest optab op lines.
    86  type opBytes [31]uint8
    87  
    88  type Optab struct {
    89  	as     obj.As
    90  	ytab   []ytab
    91  	prefix uint8
    92  	op     opBytes
    93  }
    94  
    95  type movtab struct {
    96  	as   obj.As
    97  	ft   uint8
    98  	f3t  uint8
    99  	tt   uint8
   100  	code uint8
   101  	op   [4]uint8
   102  }
   103  
   104  const (
   105  	Yxxx = iota
   106  	Ynone
   107  	Yi0 // $0
   108  	Yi1 // $1
   109  	Yu2 // $x, x fits in uint2
   110  	Yi8 // $x, x fits in int8
   111  	Yu8 // $x, x fits in uint8
   112  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   113  	Ys32
   114  	Yi32
   115  	Yi64
   116  	Yiauto
   117  	Yal
   118  	Ycl
   119  	Yax
   120  	Ycx
   121  	Yrb
   122  	Yrl
   123  	Yrl32 // Yrl on 32-bit system
   124  	Yrf
   125  	Yf0
   126  	Yrx
   127  	Ymb
   128  	Yml
   129  	Ym
   130  	Ybr
   131  	Ycs
   132  	Yss
   133  	Yds
   134  	Yes
   135  	Yfs
   136  	Ygs
   137  	Ygdtr
   138  	Yidtr
   139  	Yldtr
   140  	Ymsw
   141  	Ytask
   142  	Ycr0
   143  	Ycr1
   144  	Ycr2
   145  	Ycr3
   146  	Ycr4
   147  	Ycr5
   148  	Ycr6
   149  	Ycr7
   150  	Ycr8
   151  	Ydr0
   152  	Ydr1
   153  	Ydr2
   154  	Ydr3
   155  	Ydr4
   156  	Ydr5
   157  	Ydr6
   158  	Ydr7
   159  	Ytr0
   160  	Ytr1
   161  	Ytr2
   162  	Ytr3
   163  	Ytr4
   164  	Ytr5
   165  	Ytr6
   166  	Ytr7
   167  	Ymr
   168  	Ymm
   169  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   170  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   171  	Yxr           // X0..X15
   172  	YxrEvex       // X0..X31
   173  	Yxm
   174  	YxmEvex       // YxrEvex+Ym
   175  	Yxvm          // VSIB vector array; vm32x/vm64x
   176  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   177  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   178  	Yyr           // Y0..Y15
   179  	YyrEvex       // Y0..Y31
   180  	Yym
   181  	YymEvex   // YyrEvex+Ym
   182  	Yyvm      // VSIB vector array; vm32y/vm64y
   183  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   184  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   185  	Yzr       // Z0..Z31
   186  	Yzm       // Yzr+Ym
   187  	Yzvm      // VSIB vector array; vm32z/vm64z
   188  	Yk0       // K0
   189  	Yknot0    // K1..K7; write mask
   190  	Yk        // K0..K7; used for KOP
   191  	Ykm       // Yk+Ym; used for KOP
   192  	Ytls
   193  	Ytextsize
   194  	Yindir
   195  	Ymax
   196  )
   197  
   198  const (
   199  	Zxxx = iota
   200  	Zlit
   201  	Zlitm_r
   202  	Zlitr_m
   203  	Zlit_m_r
   204  	Z_rp
   205  	Zbr
   206  	Zcall
   207  	Zcallcon
   208  	Zcallduff
   209  	Zcallind
   210  	Zcallindreg
   211  	Zib_
   212  	Zib_rp
   213  	Zibo_m
   214  	Zibo_m_xm
   215  	Zil_
   216  	Zil_rp
   217  	Ziq_rp
   218  	Zilo_m
   219  	Zjmp
   220  	Zjmpcon
   221  	Zloop
   222  	Zo_iw
   223  	Zm_o
   224  	Zm_r
   225  	Z_m_r
   226  	Zm2_r
   227  	Zm_r_xm
   228  	Zm_r_i_xm
   229  	Zm_r_xm_nr
   230  	Zr_m_xm_nr
   231  	Zibm_r // mmx1,mmx2/mem64,imm8
   232  	Zibr_m
   233  	Zmb_r
   234  	Zaut_r
   235  	Zo_m
   236  	Zo_m64
   237  	Zpseudo
   238  	Zr_m
   239  	Zr_m_xm
   240  	Zrp_
   241  	Z_ib
   242  	Z_il
   243  	Zm_ibo
   244  	Zm_ilo
   245  	Zib_rr
   246  	Zil_rr
   247  	Zbyte
   248  
   249  	Zvex_rm_v_r
   250  	Zvex_rm_v_ro
   251  	Zvex_r_v_rm
   252  	Zvex_i_rm_vo
   253  	Zvex_v_rm_r
   254  	Zvex_i_rm_r
   255  	Zvex_i_r_v
   256  	Zvex_i_rm_v_r
   257  	Zvex
   258  	Zvex_rm_r_vo
   259  	Zvex_i_r_rm
   260  	Zvex_hr_rm_v_r
   261  
   262  	Zevex_first
   263  	Zevex_i_r_k_rm
   264  	Zevex_i_r_rm
   265  	Zevex_i_rm_k_r
   266  	Zevex_i_rm_k_vo
   267  	Zevex_i_rm_r
   268  	Zevex_i_rm_v_k_r
   269  	Zevex_i_rm_v_r
   270  	Zevex_i_rm_vo
   271  	Zevex_k_rmo
   272  	Zevex_r_k_rm
   273  	Zevex_r_v_k_rm
   274  	Zevex_r_v_rm
   275  	Zevex_rm_k_r
   276  	Zevex_rm_v_k_r
   277  	Zevex_rm_v_r
   278  	Zevex_last
   279  
   280  	Zmax
   281  )
   282  
   283  const (
   284  	Px   = 0
   285  	Px1  = 1    // symbolic; exact value doesn't matter
   286  	P32  = 0x32 // 32-bit only
   287  	Pe   = 0x66 // operand escape
   288  	Pm   = 0x0f // 2byte opcode escape
   289  	Pq   = 0xff // both escapes: 66 0f
   290  	Pb   = 0xfe // byte operands
   291  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   292  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   293  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   294  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   295  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   296  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   297  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   298  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   299  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   300  	Pw   = 0x48 // Rex.w
   301  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   302  	Py   = 0x80 // defaults to 64-bit mode
   303  	Py1  = 0x81 // symbolic; exact value doesn't matter
   304  	Py3  = 0x83 // symbolic; exact value doesn't matter
   305  	Pavx = 0x84 // symbolic: exact value doesn't matter
   306  
   307  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   308  	Rxw     = 1 << 3 // =1, 64-bit operand size
   309  	Rxr     = 1 << 2 // extend modrm reg
   310  	Rxx     = 1 << 1 // extend sib index
   311  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   312  )
   313  
   314  const (
   315  	// Encoding for VEX prefix in tables.
   316  	// The P, L, and W fields are chosen to match
   317  	// their eventual locations in the VEX prefix bytes.
   318  
   319  	// Encoding for VEX prefix in tables.
   320  	// The P, L, and W fields are chosen to match
   321  	// their eventual locations in the VEX prefix bytes.
   322  
   323  	// Using spare bit to make leading [E]VEX encoding byte different from
   324  	// 0x0f even if all other VEX fields are 0.
   325  	avxEscape = 1 << 6
   326  
   327  	// P field - 2 bits
   328  	vex66 = 1 << 0
   329  	vexF3 = 2 << 0
   330  	vexF2 = 3 << 0
   331  	// L field - 1 bit
   332  	vexLZ  = 0 << 2
   333  	vexLIG = 0 << 2
   334  	vex128 = 0 << 2
   335  	vex256 = 1 << 2
   336  	// W field - 1 bit
   337  	vexWIG = 0 << 7
   338  	vexW0  = 0 << 7
   339  	vexW1  = 1 << 7
   340  	// M field - 5 bits, but mostly reserved; we can store up to 3
   341  	vex0F   = 1 << 3
   342  	vex0F38 = 2 << 3
   343  	vex0F3A = 3 << 3
   344  )
   345  
   346  var ycover [Ymax * Ymax]uint8
   347  
   348  var reg [MAXREG]int
   349  
   350  var regrex [MAXREG + 1]int
   351  
   352  var ynone = []ytab{
   353  	{Zlit, 1, argList{}},
   354  }
   355  
   356  var ytext = []ytab{
   357  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   358  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   359  }
   360  
   361  var ynop = []ytab{
   362  	{Zpseudo, 0, argList{}},
   363  	{Zpseudo, 0, argList{Yiauto}},
   364  	{Zpseudo, 0, argList{Yml}},
   365  	{Zpseudo, 0, argList{Yrf}},
   366  	{Zpseudo, 0, argList{Yxr}},
   367  	{Zpseudo, 0, argList{Yiauto}},
   368  	{Zpseudo, 0, argList{Yml}},
   369  	{Zpseudo, 0, argList{Yrf}},
   370  	{Zpseudo, 1, argList{Yxr}},
   371  }
   372  
   373  var yfuncdata = []ytab{
   374  	{Zpseudo, 0, argList{Yi32, Ym}},
   375  }
   376  
   377  var ypcdata = []ytab{
   378  	{Zpseudo, 0, argList{Yi32, Yi32}},
   379  }
   380  
   381  var yxorb = []ytab{
   382  	{Zib_, 1, argList{Yi32, Yal}},
   383  	{Zibo_m, 2, argList{Yi32, Ymb}},
   384  	{Zr_m, 1, argList{Yrb, Ymb}},
   385  	{Zm_r, 1, argList{Ymb, Yrb}},
   386  }
   387  
   388  var yaddl = []ytab{
   389  	{Zibo_m, 2, argList{Yi8, Yml}},
   390  	{Zil_, 1, argList{Yi32, Yax}},
   391  	{Zilo_m, 2, argList{Yi32, Yml}},
   392  	{Zr_m, 1, argList{Yrl, Yml}},
   393  	{Zm_r, 1, argList{Yml, Yrl}},
   394  }
   395  
   396  var yincl = []ytab{
   397  	{Z_rp, 1, argList{Yrl}},
   398  	{Zo_m, 2, argList{Yml}},
   399  }
   400  
   401  var yincq = []ytab{
   402  	{Zo_m, 2, argList{Yml}},
   403  }
   404  
   405  var ycmpb = []ytab{
   406  	{Z_ib, 1, argList{Yal, Yi32}},
   407  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   408  	{Zm_r, 1, argList{Ymb, Yrb}},
   409  	{Zr_m, 1, argList{Yrb, Ymb}},
   410  }
   411  
   412  var ycmpl = []ytab{
   413  	{Zm_ibo, 2, argList{Yml, Yi8}},
   414  	{Z_il, 1, argList{Yax, Yi32}},
   415  	{Zm_ilo, 2, argList{Yml, Yi32}},
   416  	{Zm_r, 1, argList{Yml, Yrl}},
   417  	{Zr_m, 1, argList{Yrl, Yml}},
   418  }
   419  
   420  var yshb = []ytab{
   421  	{Zo_m, 2, argList{Yi1, Ymb}},
   422  	{Zibo_m, 2, argList{Yu8, Ymb}},
   423  	{Zo_m, 2, argList{Ycx, Ymb}},
   424  }
   425  
   426  var yshl = []ytab{
   427  	{Zo_m, 2, argList{Yi1, Yml}},
   428  	{Zibo_m, 2, argList{Yu8, Yml}},
   429  	{Zo_m, 2, argList{Ycl, Yml}},
   430  	{Zo_m, 2, argList{Ycx, Yml}},
   431  }
   432  
   433  var ytestl = []ytab{
   434  	{Zil_, 1, argList{Yi32, Yax}},
   435  	{Zilo_m, 2, argList{Yi32, Yml}},
   436  	{Zr_m, 1, argList{Yrl, Yml}},
   437  	{Zm_r, 1, argList{Yml, Yrl}},
   438  }
   439  
   440  var ymovb = []ytab{
   441  	{Zr_m, 1, argList{Yrb, Ymb}},
   442  	{Zm_r, 1, argList{Ymb, Yrb}},
   443  	{Zib_rp, 1, argList{Yi32, Yrb}},
   444  	{Zibo_m, 2, argList{Yi32, Ymb}},
   445  }
   446  
   447  var ybtl = []ytab{
   448  	{Zibo_m, 2, argList{Yi8, Yml}},
   449  	{Zr_m, 1, argList{Yrl, Yml}},
   450  }
   451  
   452  var ymovw = []ytab{
   453  	{Zr_m, 1, argList{Yrl, Yml}},
   454  	{Zm_r, 1, argList{Yml, Yrl}},
   455  	{Zil_rp, 1, argList{Yi32, Yrl}},
   456  	{Zilo_m, 2, argList{Yi32, Yml}},
   457  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   458  }
   459  
   460  var ymovl = []ytab{
   461  	{Zr_m, 1, argList{Yrl, Yml}},
   462  	{Zm_r, 1, argList{Yml, Yrl}},
   463  	{Zil_rp, 1, argList{Yi32, Yrl}},
   464  	{Zilo_m, 2, argList{Yi32, Yml}},
   465  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   466  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   467  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   468  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   469  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   470  }
   471  
   472  var yret = []ytab{
   473  	{Zo_iw, 1, argList{}},
   474  	{Zo_iw, 1, argList{Yi32}},
   475  }
   476  
   477  var ymovq = []ytab{
   478  	// valid in 32-bit mode
   479  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   480  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   481  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   482  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   483  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   484  
   485  	// valid only in 64-bit mode, usually with 64-bit prefix
   486  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   487  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   488  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   489  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   490  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   491  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   492  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   493  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   494  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   495  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   496  }
   497  
   498  var ymovbe = []ytab{
   499  	{Zlitm_r, 3, argList{Ym, Yrl}},
   500  	{Zlitr_m, 3, argList{Yrl, Ym}},
   501  }
   502  
   503  var ym_rl = []ytab{
   504  	{Zm_r, 1, argList{Ym, Yrl}},
   505  }
   506  
   507  var yrl_m = []ytab{
   508  	{Zr_m, 1, argList{Yrl, Ym}},
   509  }
   510  
   511  var ymb_rl = []ytab{
   512  	{Zmb_r, 1, argList{Ymb, Yrl}},
   513  }
   514  
   515  var yml_rl = []ytab{
   516  	{Zm_r, 1, argList{Yml, Yrl}},
   517  }
   518  
   519  var yrl_ml = []ytab{
   520  	{Zr_m, 1, argList{Yrl, Yml}},
   521  }
   522  
   523  var yml_mb = []ytab{
   524  	{Zr_m, 1, argList{Yrb, Ymb}},
   525  	{Zm_r, 1, argList{Ymb, Yrb}},
   526  }
   527  
   528  var yrb_mb = []ytab{
   529  	{Zr_m, 1, argList{Yrb, Ymb}},
   530  }
   531  
   532  var yxchg = []ytab{
   533  	{Z_rp, 1, argList{Yax, Yrl}},
   534  	{Zrp_, 1, argList{Yrl, Yax}},
   535  	{Zr_m, 1, argList{Yrl, Yml}},
   536  	{Zm_r, 1, argList{Yml, Yrl}},
   537  }
   538  
   539  var ydivl = []ytab{
   540  	{Zm_o, 2, argList{Yml}},
   541  }
   542  
   543  var ydivb = []ytab{
   544  	{Zm_o, 2, argList{Ymb}},
   545  }
   546  
   547  var yimul = []ytab{
   548  	{Zm_o, 2, argList{Yml}},
   549  	{Zib_rr, 1, argList{Yi8, Yrl}},
   550  	{Zil_rr, 1, argList{Yi32, Yrl}},
   551  	{Zm_r, 2, argList{Yml, Yrl}},
   552  }
   553  
   554  var yimul3 = []ytab{
   555  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   556  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   557  }
   558  
   559  var ybyte = []ytab{
   560  	{Zbyte, 1, argList{Yi64}},
   561  }
   562  
   563  var yin = []ytab{
   564  	{Zib_, 1, argList{Yi32}},
   565  	{Zlit, 1, argList{}},
   566  }
   567  
   568  var yint = []ytab{
   569  	{Zib_, 1, argList{Yi32}},
   570  }
   571  
   572  var ypushl = []ytab{
   573  	{Zrp_, 1, argList{Yrl}},
   574  	{Zm_o, 2, argList{Ym}},
   575  	{Zib_, 1, argList{Yi8}},
   576  	{Zil_, 1, argList{Yi32}},
   577  }
   578  
   579  var ypopl = []ytab{
   580  	{Z_rp, 1, argList{Yrl}},
   581  	{Zo_m, 2, argList{Ym}},
   582  }
   583  
   584  var ywrfsbase = []ytab{
   585  	{Zm_o, 2, argList{Yrl}},
   586  }
   587  
   588  var yrdrand = []ytab{
   589  	{Zo_m, 2, argList{Yrl}},
   590  }
   591  
   592  var yclflush = []ytab{
   593  	{Zo_m, 2, argList{Ym}},
   594  }
   595  
   596  var ybswap = []ytab{
   597  	{Z_rp, 2, argList{Yrl}},
   598  }
   599  
   600  var yscond = []ytab{
   601  	{Zo_m, 2, argList{Ymb}},
   602  }
   603  
   604  var yjcond = []ytab{
   605  	{Zbr, 0, argList{Ybr}},
   606  	{Zbr, 0, argList{Yi0, Ybr}},
   607  	{Zbr, 1, argList{Yi1, Ybr}},
   608  }
   609  
   610  var yloop = []ytab{
   611  	{Zloop, 1, argList{Ybr}},
   612  }
   613  
   614  var ycall = []ytab{
   615  	{Zcallindreg, 0, argList{Yml}},
   616  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   617  	{Zcallind, 2, argList{Yindir}},
   618  	{Zcall, 0, argList{Ybr}},
   619  	{Zcallcon, 1, argList{Yi32}},
   620  }
   621  
   622  var yduff = []ytab{
   623  	{Zcallduff, 1, argList{Yi32}},
   624  }
   625  
   626  var yjmp = []ytab{
   627  	{Zo_m64, 2, argList{Yml}},
   628  	{Zjmp, 0, argList{Ybr}},
   629  	{Zjmpcon, 1, argList{Yi32}},
   630  }
   631  
   632  var yfmvd = []ytab{
   633  	{Zm_o, 2, argList{Ym, Yf0}},
   634  	{Zo_m, 2, argList{Yf0, Ym}},
   635  	{Zm_o, 2, argList{Yrf, Yf0}},
   636  	{Zo_m, 2, argList{Yf0, Yrf}},
   637  }
   638  
   639  var yfmvdp = []ytab{
   640  	{Zo_m, 2, argList{Yf0, Ym}},
   641  	{Zo_m, 2, argList{Yf0, Yrf}},
   642  }
   643  
   644  var yfmvf = []ytab{
   645  	{Zm_o, 2, argList{Ym, Yf0}},
   646  	{Zo_m, 2, argList{Yf0, Ym}},
   647  }
   648  
   649  var yfmvx = []ytab{
   650  	{Zm_o, 2, argList{Ym, Yf0}},
   651  }
   652  
   653  var yfmvp = []ytab{
   654  	{Zo_m, 2, argList{Yf0, Ym}},
   655  }
   656  
   657  var yfcmv = []ytab{
   658  	{Zm_o, 2, argList{Yrf, Yf0}},
   659  }
   660  
   661  var yfadd = []ytab{
   662  	{Zm_o, 2, argList{Ym, Yf0}},
   663  	{Zm_o, 2, argList{Yrf, Yf0}},
   664  	{Zo_m, 2, argList{Yf0, Yrf}},
   665  }
   666  
   667  var yfxch = []ytab{
   668  	{Zo_m, 2, argList{Yf0, Yrf}},
   669  	{Zm_o, 2, argList{Yrf, Yf0}},
   670  }
   671  
   672  var ycompp = []ytab{
   673  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   674  }
   675  
   676  var ystsw = []ytab{
   677  	{Zo_m, 2, argList{Ym}},
   678  	{Zlit, 1, argList{Yax}},
   679  }
   680  
   681  var ysvrs_mo = []ytab{
   682  	{Zm_o, 2, argList{Ym}},
   683  }
   684  
   685  // unaryDst version of "ysvrs_mo".
   686  var ysvrs_om = []ytab{
   687  	{Zo_m, 2, argList{Ym}},
   688  }
   689  
   690  var ymm = []ytab{
   691  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   692  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   693  }
   694  
   695  var yxm = []ytab{
   696  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   697  }
   698  
   699  var yxm_q4 = []ytab{
   700  	{Zm_r, 1, argList{Yxm, Yxr}},
   701  }
   702  
   703  var yxcvm1 = []ytab{
   704  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   705  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   706  }
   707  
   708  var yxcvm2 = []ytab{
   709  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   710  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   711  }
   712  
   713  var yxr = []ytab{
   714  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   715  }
   716  
   717  var yxr_ml = []ytab{
   718  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   719  }
   720  
   721  var ymr = []ytab{
   722  	{Zm_r, 1, argList{Ymr, Ymr}},
   723  }
   724  
   725  var ymr_ml = []ytab{
   726  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   727  }
   728  
   729  var yxcmpi = []ytab{
   730  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   731  }
   732  
   733  var yxmov = []ytab{
   734  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   735  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   736  }
   737  
   738  var yxcvfl = []ytab{
   739  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   740  }
   741  
   742  var yxcvlf = []ytab{
   743  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   744  }
   745  
   746  var yxcvfq = []ytab{
   747  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   748  }
   749  
   750  var yxcvqf = []ytab{
   751  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   752  }
   753  
   754  var yps = []ytab{
   755  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   756  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   757  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   758  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   759  }
   760  
   761  var yxrrl = []ytab{
   762  	{Zm_r, 1, argList{Yxr, Yrl}},
   763  }
   764  
   765  var ymrxr = []ytab{
   766  	{Zm_r, 1, argList{Ymr, Yxr}},
   767  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   768  }
   769  
   770  var ymshuf = []ytab{
   771  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   772  }
   773  
   774  var ymshufb = []ytab{
   775  	{Zm2_r, 2, argList{Yxm, Yxr}},
   776  }
   777  
   778  // It should never have more than 1 entry,
   779  // because some optab entries you opcode secuences that
   780  // are longer than 2 bytes (zoffset=2 here),
   781  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   782  // to name a few.
   783  var yxshuf = []ytab{
   784  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   785  }
   786  
   787  var yextrw = []ytab{
   788  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   789  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   790  }
   791  
   792  var yextr = []ytab{
   793  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   794  }
   795  
   796  var yinsrw = []ytab{
   797  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   798  }
   799  
   800  var yinsr = []ytab{
   801  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   802  }
   803  
   804  var ypsdq = []ytab{
   805  	{Zibo_m, 2, argList{Yi8, Yxr}},
   806  }
   807  
   808  var ymskb = []ytab{
   809  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   810  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   811  }
   812  
   813  var ycrc32l = []ytab{
   814  	{Zlitm_r, 0, argList{Yml, Yrl}},
   815  }
   816  
   817  var ycrc32b = []ytab{
   818  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   819  }
   820  
   821  var yprefetch = []ytab{
   822  	{Zm_o, 2, argList{Ym}},
   823  }
   824  
   825  var yaes = []ytab{
   826  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   827  }
   828  
   829  var yxbegin = []ytab{
   830  	{Zjmp, 1, argList{Ybr}},
   831  }
   832  
   833  var yxabort = []ytab{
   834  	{Zib_, 1, argList{Yu8}},
   835  }
   836  
   837  var ylddqu = []ytab{
   838  	{Zm_r, 1, argList{Ym, Yxr}},
   839  }
   840  
   841  var ypalignr = []ytab{
   842  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   843  }
   844  
   845  var ysha256rnds2 = []ytab{
   846  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   847  }
   848  
   849  var yblendvpd = []ytab{
   850  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   851  }
   852  
   853  var ymmxmm0f38 = []ytab{
   854  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   855  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   856  }
   857  
   858  var yextractps = []ytab{
   859  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   860  }
   861  
   862  var ysha1rnds4 = []ytab{
   863  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   864  }
   865  
   866  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   867  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   868  // to find the entry with the given p.As and then looks through the ytable for
   869  // that instruction (the second field in the optab struct) for a line whose
   870  // first two values match the Ytypes of the p.From and p.To operands.  The
   871  // function oclass computes the specific Ytype of an operand and then the set
   872  // of more general Ytypes that it satisfies is implied by the ycover table, set
   873  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   874  // from the more general 8-bit constants, but instinit says
   875  //
   876  //        ycover[Yi0*Ymax+Ys32] = 1
   877  //        ycover[Yi1*Ymax+Ys32] = 1
   878  //        ycover[Yi8*Ymax+Ys32] = 1
   879  //
   880  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   881  // if that's what an instruction can handle.
   882  //
   883  // In parallel with the scan through the ytable for the appropriate line, there
   884  // is a z pointer that starts out pointing at the strange magic byte list in
   885  // the Optab struct.  With each step past a non-matching ytable line, z
   886  // advances by the 4th entry in the line.  When a matching line is found, that
   887  // z pointer has the extra data to use in laying down the instruction bytes.
   888  // The actual bytes laid down are a function of the 3rd entry in the line (that
   889  // is, the Ztype) and the z bytes.
   890  //
   891  // For example, let's look at AADDL.  The optab line says:
   892  //        {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //        var yaddl = []ytab{
   896  //                {Yi8, Ynone, Yml, Zibo_m, 2},
   897  //                {Yi32, Ynone, Yax, Zil_, 1},
   898  //                {Yi32, Ynone, Yml, Zilo_m, 2},
   899  //                {Yrl, Ynone, Yml, Zr_m, 1},
   900  //                {Yml, Ynone, Yrl, Zm_r, 1},
   901  //        }
   902  //
   903  // so there are 5 possible types of ADDL instruction that can be laid down, and
   904  // possible states used to lay them down (Ztype and z pointer, assuming z
   905  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   906  //
   907  //        Yi8, Yml -> Zibo_m, z (0x83, 00)
   908  //        Yi32, Yax -> Zil_, z+2 (0x05)
   909  //        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   910  //        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   911  //        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   912  //
   913  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   914  // relatively straightforward as this program goes.
   915  //
   916  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   917  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   918  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   919  // Zilo_m is the same but a long (32-bit) immediate.
   920  var optab =
   921  //	as, ytab, andproto, opcode
   922  [...]Optab{
   923  	{obj.AXXX, nil, 0, opBytes{}},
   924  	{AAAA, ynone, P32, opBytes{0x37}},
   925  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   926  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   927  	{AAAS, ynone, P32, opBytes{0x3f}},
   928  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   929  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   930  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   933  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   934  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   935  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   936  	{AADDPD, yxm, Pq, opBytes{0x58}},
   937  	{AADDPS, yxm, Pm, opBytes{0x58}},
   938  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   939  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   940  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   941  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   942  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   943  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   944  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   945  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   946  	{AADJSP, nil, 0, opBytes{}},
   947  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   948  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   949  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   950  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   951  	{AANDPD, yxm, Pq, opBytes{0x54}},
   952  	{AANDPS, yxm, Pm, opBytes{0x54}},
   953  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   954  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   956  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   957  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   958  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   959  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   960  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   961  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   962  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   963  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   964  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   965  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   966  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   967  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   968  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   969  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   970  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   971  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   972  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   973  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   974  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   975  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   976  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   977  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   978  	{ABYTE, ybyte, Px, opBytes{1}},
   979  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   980  	{ACBW, ynone, Pe, opBytes{0x98}},
   981  	{ACDQ, ynone, Px, opBytes{0x99}},
   982  	{ACDQE, ynone, Pw, opBytes{0x98}},
   983  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   984  	{ACLC, ynone, Px, opBytes{0xf8}},
   985  	{ACLD, ynone, Px, opBytes{0xfc}},
   986  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   987  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   988  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   989  	{ACLI, ynone, Px, opBytes{0xfa}},
   990  	{ACLTS, ynone, Pm, opBytes{0x06}},
   991  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   992  	{ACMC, ynone, Px, opBytes{0xf5}},
   993  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   994  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   995  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   996  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   997  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   998  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
   999  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1000  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1001  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1002  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1003  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1004  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1005  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1006  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1007  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1008  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1009  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1010  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1011  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1012  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1013  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1014  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1015  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1016  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1017  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1018  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1019  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1020  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1021  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1022  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1023  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1024  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1025  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1026  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1027  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1028  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1029  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1030  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1031  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1032  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1033  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1034  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1035  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1036  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1037  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1038  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1039  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1040  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1041  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1042  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1043  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1044  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1045  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1046  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1047  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1048  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1049  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1050  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1051  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1052  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1053  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1054  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1055  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1056  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1057  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1058  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1059  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1060  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1061  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1062  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1063  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1064  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1065  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1066  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1067  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1068  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1069  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1070  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1071  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1072  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1073  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1074  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1075  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1076  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1077  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1078  	{ACWD, ynone, Pe, opBytes{0x99}},
  1079  	{ACWDE, ynone, Px, opBytes{0x98}},
  1080  	{ACQO, ynone, Pw, opBytes{0x99}},
  1081  	{ADAA, ynone, P32, opBytes{0x27}},
  1082  	{ADAS, ynone, P32, opBytes{0x2f}},
  1083  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1084  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1085  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1086  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1087  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1088  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1089  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1090  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1091  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1092  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1093  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1094  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1095  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1096  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1097  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1098  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1099  	{AENTER, nil, 0, opBytes{}}, // botch
  1100  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1101  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1102  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1103  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1104  	{AHLT, ynone, Px, opBytes{0xf4}},
  1105  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1106  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1107  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1108  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1109  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1110  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1111  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1114  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1115  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1117  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1118  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1119  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1120  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1121  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1122  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1123  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1124  	{AINSL, ynone, Px, opBytes{0x6d}},
  1125  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1126  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1127  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1128  	{AINT, yint, Px, opBytes{0xcd}},
  1129  	{AINTO, ynone, P32, opBytes{0xce}},
  1130  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1131  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1132  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1133  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1134  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1135  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1136  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1137  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1138  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1139  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1140  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1141  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1142  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1143  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1144  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1145  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1146  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1147  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1148  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1149  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1150  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1151  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1152  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1153  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1154  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1155  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1156  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1157  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1158  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1159  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1160  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1161  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1162  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1163  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1164  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1165  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1166  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1167  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1168  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1169  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1170  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1171  	{ALODSL, ynone, Px, opBytes{0xad}},
  1172  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1173  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1174  	{ALONG, ybyte, Px, opBytes{4}},
  1175  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1176  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1177  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1178  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1179  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1180  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1181  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1182  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1183  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1184  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1185  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1186  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1187  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1188  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1189  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1190  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1191  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1192  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1193  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1194  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1195  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1196  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1197  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1198  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1199  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1200  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1201  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1202  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1203  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1204  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1205  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1206  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1207  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1208  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1209  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1210  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1211  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1212  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1213  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1214  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1215  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1216  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1217  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1218  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1219  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1220  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1221  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1222  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1223  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1224  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1225  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1226  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1227  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1228  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1229  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1230  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1231  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1232  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1233  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1234  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1235  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1236  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1237  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1238  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1239  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1240  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1241  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1242  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1243  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1244  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1245  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1246  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1247  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1248  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1249  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1250  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1251  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1252  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1253  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1254  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1255  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1256  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1257  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1258  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1259  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1260  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1261  	{AORPD, yxm, Pq, opBytes{0x56}},
  1262  	{AORPS, yxm, Pm, opBytes{0x56}},
  1263  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1264  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1265  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1266  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1267  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1268  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1269  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1270  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1271  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1272  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1273  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1274  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1275  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1276  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1277  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1278  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1279  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1280  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1281  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1282  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1283  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1284  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1285  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1286  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1287  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1288  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1289  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1290  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1291  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1292  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1293  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1294  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1295  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1296  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1297  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1298  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1299  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1300  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1301  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1302  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1303  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1304  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1305  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1306  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1307  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1308  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1309  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1310  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1311  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1312  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1313  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1314  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1315  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1316  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1317  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1318  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1319  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1320  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1321  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1322  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1323  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1324  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1325  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1326  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1327  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1328  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1329  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1330  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1331  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1332  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1333  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1334  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1335  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1336  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1337  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1338  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1339  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1340  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1341  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1342  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1343  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1344  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1345  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1346  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1347  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1348  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1349  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1350  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1351  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1352  	{APOPAL, ynone, P32, opBytes{0x61}},
  1353  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1354  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1355  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1356  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1357  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1358  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1359  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1360  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1361  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1362  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1363  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1364  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1365  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1366  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1367  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1368  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1369  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1370  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1371  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1372  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1373  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1374  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1375  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1376  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1377  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1378  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1379  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1380  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1381  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1382  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1383  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1384  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1385  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1386  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1387  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1388  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1389  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1390  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1391  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1392  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1393  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1394  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1395  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1396  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1397  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1398  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1399  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1400  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1401  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1402  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1403  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1404  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1405  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1406  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1407  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1409  	{AQUAD, ybyte, Px, opBytes{8}},
  1410  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1411  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1412  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1413  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1415  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1416  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1417  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1418  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1419  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{AREP, ynone, Px, opBytes{0xf3}},
  1421  	{AREPN, ynone, Px, opBytes{0xf2}},
  1422  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1423  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1424  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1425  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1426  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1427  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1428  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1429  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1431  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1432  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1433  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1435  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1436  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1437  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1438  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1439  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1440  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1442  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1443  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1444  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1446  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1447  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1448  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1450  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1451  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1452  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1453  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1454  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1455  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1456  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1457  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1458  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1459  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1460  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1461  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1462  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1463  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1464  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1465  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1466  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1467  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1468  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1469  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1470  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1471  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1472  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1474  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1475  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1476  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1478  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1479  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1480  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1481  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1482  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1483  	{ASTC, ynone, Px, opBytes{0xf9}},
  1484  	{ASTD, ynone, Px, opBytes{0xfd}},
  1485  	{ASTI, ynone, Px, opBytes{0xfb}},
  1486  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1487  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1488  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1489  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1490  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1491  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1492  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1493  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1494  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1495  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1496  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1497  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1498  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1499  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1500  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1501  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1502  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1503  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1504  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1506  	{obj.ATEXT, ytext, Px, opBytes{}},
  1507  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1508  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1509  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1510  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1511  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1512  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1513  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1514  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1515  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1516  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1517  	{AWORD, ybyte, Px, opBytes{2}},
  1518  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1519  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1520  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1521  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1523  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1524  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1525  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1526  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1527  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1528  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1530  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1531  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1532  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1533  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1534  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1535  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1536  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1537  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1538  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1539  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1540  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1541  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1542  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1543  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1544  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1545  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1546  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1547  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1548  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1549  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1550  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1551  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1552  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1553  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1554  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1555  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1556  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1557  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1558  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1559  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1560  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1561  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1562  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1563  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1564  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1565  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1566  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1567  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1568  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1569  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1570  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1571  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1572  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1573  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1574  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1575  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1576  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1577  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1578  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1579  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1580  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1581  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1582  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1583  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1584  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1585  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1586  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1587  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1588  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1589  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1590  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1591  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1592  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1593  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1594  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1595  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1596  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1597  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1598  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1599  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1600  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1601  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1602  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1603  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1604  	{AFFREE, nil, 0, opBytes{}},
  1605  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1606  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1607  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1608  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1609  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1610  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1611  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1612  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1613  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1614  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1615  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1616  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1617  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1618  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1619  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1620  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1621  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1622  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1623  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1624  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1625  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1626  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1627  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1628  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1629  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1630  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1631  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1632  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1633  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1634  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1635  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1636  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1637  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1638  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1639  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1640  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1641  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1642  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1643  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1644  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1645  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1646  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1647  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1649  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1650  	{AINVD, ynone, Pm, opBytes{0x08}},
  1651  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1652  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1653  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1654  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1655  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1656  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1657  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1658  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1659  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1660  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1661  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1662  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1663  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1664  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1665  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1666  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1667  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1668  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1669  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1670  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1671  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1672  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1673  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1674  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1676  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1677  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1678  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1679  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1680  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1681  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1682  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1683  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1684  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1685  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1686  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1687  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1688  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1689  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1690  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1691  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1692  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1693  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1694  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1695  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1696  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1697  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1698  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1699  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1700  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1701  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1702  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1703  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1704  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1705  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1706  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1707  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1708  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1709  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1710  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1711  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1712  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1713  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1715  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1716  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1717  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1718  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1719  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1720  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1721  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1722  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1723  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1724  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1725  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1726  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1727  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1728  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1729  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1730  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1732  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1733  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1734  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1735  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1736  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1737  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1738  	{AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1739  	{AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1740  	{AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1741  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1742  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1743  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1744  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1745  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1746  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1747  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1748  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1749  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1750  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1751  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1752  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1753  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1754  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1755  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1756  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1757  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1758  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1759  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1760  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1761  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1762  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1763  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1764  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1765  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1766  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1767  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1768  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1769  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1770  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1771  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1772  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1773  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1774  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1775  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1776  
  1777  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1778  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1779  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1780  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1781  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1782  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1783  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1784  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1785  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1786  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1787  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1788  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1789  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1790  
  1791  	{obj.AEND, nil, 0, opBytes{}},
  1792  	{0, nil, 0, opBytes{}},
  1793  }
  1794  
  1795  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1796  
  1797  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1798  // This happens on systems like Solaris that call .so functions instead of system calls.
  1799  // It does not seem to be necessary for any other systems. This is probably working
  1800  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1801  // what that bug is. And this does fix it.
  1802  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1803  	if ctxt.Headtype == objabi.Hsolaris {
  1804  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1805  		return strings.HasPrefix(s.Name, "libc_")
  1806  	}
  1807  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1808  }
  1809  
  1810  // single-instruction no-ops of various lengths.
  1811  // constructed by hand and disassembled with gdb to verify.
  1812  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1813  var nop = [][16]uint8{
  1814  	{0x90},
  1815  	{0x66, 0x90},
  1816  	{0x0F, 0x1F, 0x00},
  1817  	{0x0F, 0x1F, 0x40, 0x00},
  1818  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1819  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1820  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1821  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1822  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1823  }
  1824  
  1825  // Native Client rejects the repeated 0x66 prefix.
  1826  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1827  func fillnop(p []byte, n int) {
  1828  	var m int
  1829  
  1830  	for n > 0 {
  1831  		m = n
  1832  		if m > len(nop) {
  1833  			m = len(nop)
  1834  		}
  1835  		copy(p[:m], nop[m-1][:m])
  1836  		p = p[m:]
  1837  		n -= m
  1838  	}
  1839  }
  1840  
  1841  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1842  	s.Grow(int64(c) + int64(pad))
  1843  	fillnop(s.P[c:], int(pad))
  1844  	return c + pad
  1845  }
  1846  
  1847  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1848  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1849  		return l
  1850  	}
  1851  	return q
  1852  }
  1853  
  1854  // isJump returns whether p is a jump instruction.
  1855  // It is used to ensure that no standalone or macro-fused jump will straddle
  1856  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1857  func isJump(p *obj.Prog) bool {
  1858  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1859  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1860  }
  1861  
  1862  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1863  // jump. Otherwise, nil is returned.
  1864  func lookForJCC(p *obj.Prog) *obj.Prog {
  1865  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1866  	var q *obj.Prog
  1867  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1868  	}
  1869  
  1870  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1871  		return nil
  1872  	}
  1873  
  1874  	switch q.As {
  1875  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1876  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1877  	default:
  1878  		return nil
  1879  	}
  1880  
  1881  	return q
  1882  }
  1883  
  1884  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1885  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1886  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1887  func fusedJump(p *obj.Prog) (bool, uint8) {
  1888  	var fusedSize uint8
  1889  
  1890  	// The first instruction in a macro fused pair may be preceeded by the LOCK prefix,
  1891  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1892  	// need to be careful to insert any padding before the locks rather than directly after them.
  1893  
  1894  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1895  		fusedSize += p.Isize
  1896  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1897  		}
  1898  		if p == nil {
  1899  			return false, 0
  1900  		}
  1901  	}
  1902  	if p.As == ALOCK {
  1903  		fusedSize += p.Isize
  1904  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1905  		}
  1906  		if p == nil {
  1907  			return false, 0
  1908  		}
  1909  	}
  1910  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1911  
  1912  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1913  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1914  
  1915  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1916  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1917  
  1918  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1919  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1920  
  1921  	if !cmpAddSub && !testAnd && !incDec {
  1922  		return false, 0
  1923  	}
  1924  
  1925  	if !incDec {
  1926  		var argOne obj.AddrType
  1927  		var argTwo obj.AddrType
  1928  		if cmp {
  1929  			argOne = p.From.Type
  1930  			argTwo = p.To.Type
  1931  		} else {
  1932  			argOne = p.To.Type
  1933  			argTwo = p.From.Type
  1934  		}
  1935  		if argOne == obj.TYPE_REG {
  1936  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1937  				return false, 0
  1938  			}
  1939  		} else if argOne == obj.TYPE_MEM {
  1940  			if argTwo != obj.TYPE_REG {
  1941  				return false, 0
  1942  			}
  1943  		} else {
  1944  			return false, 0
  1945  		}
  1946  	}
  1947  
  1948  	fusedSize += p.Isize
  1949  	jmp := lookForJCC(p)
  1950  	if jmp == nil {
  1951  		return false, 0
  1952  	}
  1953  
  1954  	fusedSize += jmp.Isize
  1955  
  1956  	if testAnd {
  1957  		return true, fusedSize
  1958  	}
  1959  
  1960  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1961  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1962  		return false, 0
  1963  	}
  1964  
  1965  	if cmpAddSub {
  1966  		return true, fusedSize
  1967  	}
  1968  
  1969  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1970  		return false, 0
  1971  	}
  1972  
  1973  	return true, fusedSize
  1974  }
  1975  
  1976  type padJumpsCtx int32
  1977  
  1978  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1979  	// Disable jump padding on 32 bit builds by settting
  1980  	// padJumps to 0.
  1981  	if ctxt.Arch.Family == sys.I386 {
  1982  		return padJumpsCtx(0)
  1983  	}
  1984  
  1985  	// Disable jump padding for hand written assembly code.
  1986  	if ctxt.IsAsm {
  1987  		return padJumpsCtx(0)
  1988  	}
  1989  
  1990  	return padJumpsCtx(32)
  1991  }
  1992  
  1993  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1994  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1995  // not cross or end on a 32 byte boundary.
  1996  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  1997  	if pjc == 0 {
  1998  		return c
  1999  	}
  2000  
  2001  	var toPad int32
  2002  	fj, fjSize := fusedJump(p)
  2003  	mask := int32(pjc - 1)
  2004  	if fj {
  2005  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2006  			toPad = int32(pjc) - (c & mask)
  2007  		}
  2008  	} else if isJump(p) {
  2009  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2010  			toPad = int32(pjc) - (c & mask)
  2011  		}
  2012  	}
  2013  	if toPad <= 0 {
  2014  		return c
  2015  	}
  2016  
  2017  	return noppad(ctxt, s, c, toPad)
  2018  }
  2019  
  2020  // reAssemble is called if an instruction's size changes during assembly. If
  2021  // it does and the instruction is a standalone or a macro-fused jump we need to
  2022  // reassemble.
  2023  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2024  	if pjc == 0 {
  2025  		return false
  2026  	}
  2027  
  2028  	fj, _ := fusedJump(p)
  2029  	return fj || isJump(p)
  2030  }
  2031  
  2032  type nopPad struct {
  2033  	p *obj.Prog // Instruction before the pad
  2034  	n int32     // Size of the pad
  2035  }
  2036  
  2037  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2038  	pjc := makePjcCtx(ctxt)
  2039  
  2040  	if s.P != nil {
  2041  		return
  2042  	}
  2043  
  2044  	if ycover[0] == 0 {
  2045  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2046  	}
  2047  
  2048  	for p := s.Func().Text; p != nil; p = p.Link {
  2049  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2050  			p.To.SetTarget(p)
  2051  		}
  2052  		if p.As == AADJSP {
  2053  			p.To.Type = obj.TYPE_REG
  2054  			p.To.Reg = REG_SP
  2055  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2056  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2057  			// For that case, flip the sign and the op:
  2058  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2059  			switch v := p.From.Offset; {
  2060  			case v == 0:
  2061  				p.As = obj.ANOP
  2062  			case v == 0x80 || (v < 0 && v != -0x80):
  2063  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2064  				p.From.Offset *= -1
  2065  			default:
  2066  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2067  			}
  2068  		}
  2069  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2070  			if p.To.Type != obj.TYPE_REG {
  2071  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2072  				continue
  2073  			}
  2074  			p.To.Type = obj.TYPE_BRANCH
  2075  			p.To.Name = obj.NAME_EXTERN
  2076  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2077  			p.To.Reg = 0
  2078  			p.To.Offset = 0
  2079  		}
  2080  	}
  2081  
  2082  	var count int64 // rough count of number of instructions
  2083  	for p := s.Func().Text; p != nil; p = p.Link {
  2084  		count++
  2085  		p.Back = branchShort // use short branches first time through
  2086  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2087  			p.Back |= branchBackwards
  2088  			q.Back |= branchLoopHead
  2089  		}
  2090  	}
  2091  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2092  
  2093  	var ab AsmBuf
  2094  	var n int
  2095  	var c int32
  2096  	errors := ctxt.Errors
  2097  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2098  	nrelocs0 := len(s.R)
  2099  	for {
  2100  		// This loop continues while there are reasons to re-assemble
  2101  		// whole block, like the presence of long forward jumps.
  2102  		reAssemble := false
  2103  		for i := range s.R[nrelocs0:] {
  2104  			s.R[nrelocs0+i] = obj.Reloc{}
  2105  		}
  2106  		s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
  2107  		s.P = s.P[:0]
  2108  		c = 0
  2109  		var pPrev *obj.Prog
  2110  		nops = nops[:0]
  2111  		for p := s.Func().Text; p != nil; p = p.Link {
  2112  			c0 := c
  2113  			c = pjc.padJump(ctxt, s, p, c)
  2114  
  2115  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2116  				// pad with NOPs
  2117  				v := -c & (loopAlign - 1)
  2118  
  2119  				if v <= maxLoopPad {
  2120  					s.Grow(int64(c) + int64(v))
  2121  					fillnop(s.P[c:], int(v))
  2122  					c += v
  2123  				}
  2124  			}
  2125  
  2126  			p.Pc = int64(c)
  2127  
  2128  			// process forward jumps to p
  2129  			for q := p.Rel; q != nil; q = q.Forwd {
  2130  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2131  				if q.Back&branchShort != 0 {
  2132  					if v > 127 {
  2133  						reAssemble = true
  2134  						q.Back ^= branchShort
  2135  					}
  2136  
  2137  					if q.As == AJCXZL || q.As == AXBEGIN {
  2138  						s.P[q.Pc+2] = byte(v)
  2139  					} else {
  2140  						s.P[q.Pc+1] = byte(v)
  2141  					}
  2142  				} else {
  2143  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2144  				}
  2145  			}
  2146  
  2147  			p.Rel = nil
  2148  
  2149  			p.Pc = int64(c)
  2150  			ab.asmins(ctxt, s, p)
  2151  			m := ab.Len()
  2152  			if int(p.Isize) != m {
  2153  				p.Isize = uint8(m)
  2154  				if pjc.reAssemble(p) {
  2155  					// We need to re-assemble here to check for jumps and fused jumps
  2156  					// that span or end on 32 byte boundaries.
  2157  					reAssemble = true
  2158  				}
  2159  			}
  2160  
  2161  			s.Grow(p.Pc + int64(m))
  2162  			copy(s.P[p.Pc:], ab.Bytes())
  2163  			// If there was padding, remember it.
  2164  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2165  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2166  			}
  2167  			c += int32(m)
  2168  			pPrev = p
  2169  		}
  2170  
  2171  		n++
  2172  		if n > 20 {
  2173  			ctxt.Diag("span must be looping")
  2174  			log.Fatalf("loop")
  2175  		}
  2176  		if !reAssemble {
  2177  			break
  2178  		}
  2179  		if ctxt.Errors > errors {
  2180  			return
  2181  		}
  2182  	}
  2183  	// splice padding nops into Progs
  2184  	for _, n := range nops {
  2185  		pp := n.p
  2186  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2187  		pp.Link = np
  2188  	}
  2189  
  2190  	s.Size = int64(c)
  2191  
  2192  	if false { /* debug['a'] > 1 */
  2193  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2194  		var i int
  2195  		for i = 0; i < len(s.P); i++ {
  2196  			fmt.Printf(" %.2x", s.P[i])
  2197  			if i%16 == 15 {
  2198  				fmt.Printf("\n  %.6x", uint(i+1))
  2199  			}
  2200  		}
  2201  
  2202  		if i%16 != 0 {
  2203  			fmt.Printf("\n")
  2204  		}
  2205  
  2206  		for i := 0; i < len(s.R); i++ {
  2207  			r := &s.R[i]
  2208  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2209  		}
  2210  	}
  2211  
  2212  	// Mark nonpreemptible instruction sequences.
  2213  	// The 2-instruction TLS access sequence
  2214  	//	MOVQ TLS, BX
  2215  	//	MOVQ 0(BX)(TLS*1), BX
  2216  	// is not async preemptible, as if it is preempted and resumed on
  2217  	// a different thread, the TLS address may become invalid.
  2218  	if !CanUse1InsnTLS(ctxt) {
  2219  		useTLS := func(p *obj.Prog) bool {
  2220  			// Only need to mark the second instruction, which has
  2221  			// REG_TLS as Index. (It is okay to interrupt and restart
  2222  			// the first instruction.)
  2223  			return p.From.Index == REG_TLS
  2224  		}
  2225  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2226  	}
  2227  }
  2228  
  2229  func instinit(ctxt *obj.Link) {
  2230  	if ycover[0] != 0 {
  2231  		// Already initialized; stop now.
  2232  		// This happens in the cmd/asm tests,
  2233  		// each of which re-initializes the arch.
  2234  		return
  2235  	}
  2236  
  2237  	switch ctxt.Headtype {
  2238  	case objabi.Hplan9:
  2239  		plan9privates = ctxt.Lookup("_privates")
  2240  	}
  2241  
  2242  	for i := range avxOptab {
  2243  		c := avxOptab[i].as
  2244  		if opindex[c&obj.AMask] != nil {
  2245  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2246  		}
  2247  		opindex[c&obj.AMask] = &avxOptab[i]
  2248  	}
  2249  	for i := 1; optab[i].as != 0; i++ {
  2250  		c := optab[i].as
  2251  		if opindex[c&obj.AMask] != nil {
  2252  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2253  		}
  2254  		opindex[c&obj.AMask] = &optab[i]
  2255  	}
  2256  
  2257  	for i := 0; i < Ymax; i++ {
  2258  		ycover[i*Ymax+i] = 1
  2259  	}
  2260  
  2261  	ycover[Yi0*Ymax+Yu2] = 1
  2262  	ycover[Yi1*Ymax+Yu2] = 1
  2263  
  2264  	ycover[Yi0*Ymax+Yi8] = 1
  2265  	ycover[Yi1*Ymax+Yi8] = 1
  2266  	ycover[Yu2*Ymax+Yi8] = 1
  2267  	ycover[Yu7*Ymax+Yi8] = 1
  2268  
  2269  	ycover[Yi0*Ymax+Yu7] = 1
  2270  	ycover[Yi1*Ymax+Yu7] = 1
  2271  	ycover[Yu2*Ymax+Yu7] = 1
  2272  
  2273  	ycover[Yi0*Ymax+Yu8] = 1
  2274  	ycover[Yi1*Ymax+Yu8] = 1
  2275  	ycover[Yu2*Ymax+Yu8] = 1
  2276  	ycover[Yu7*Ymax+Yu8] = 1
  2277  
  2278  	ycover[Yi0*Ymax+Ys32] = 1
  2279  	ycover[Yi1*Ymax+Ys32] = 1
  2280  	ycover[Yu2*Ymax+Ys32] = 1
  2281  	ycover[Yu7*Ymax+Ys32] = 1
  2282  	ycover[Yu8*Ymax+Ys32] = 1
  2283  	ycover[Yi8*Ymax+Ys32] = 1
  2284  
  2285  	ycover[Yi0*Ymax+Yi32] = 1
  2286  	ycover[Yi1*Ymax+Yi32] = 1
  2287  	ycover[Yu2*Ymax+Yi32] = 1
  2288  	ycover[Yu7*Ymax+Yi32] = 1
  2289  	ycover[Yu8*Ymax+Yi32] = 1
  2290  	ycover[Yi8*Ymax+Yi32] = 1
  2291  	ycover[Ys32*Ymax+Yi32] = 1
  2292  
  2293  	ycover[Yi0*Ymax+Yi64] = 1
  2294  	ycover[Yi1*Ymax+Yi64] = 1
  2295  	ycover[Yu7*Ymax+Yi64] = 1
  2296  	ycover[Yu2*Ymax+Yi64] = 1
  2297  	ycover[Yu8*Ymax+Yi64] = 1
  2298  	ycover[Yi8*Ymax+Yi64] = 1
  2299  	ycover[Ys32*Ymax+Yi64] = 1
  2300  	ycover[Yi32*Ymax+Yi64] = 1
  2301  
  2302  	ycover[Yal*Ymax+Yrb] = 1
  2303  	ycover[Ycl*Ymax+Yrb] = 1
  2304  	ycover[Yax*Ymax+Yrb] = 1
  2305  	ycover[Ycx*Ymax+Yrb] = 1
  2306  	ycover[Yrx*Ymax+Yrb] = 1
  2307  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2308  
  2309  	ycover[Ycl*Ymax+Ycx] = 1
  2310  
  2311  	ycover[Yax*Ymax+Yrx] = 1
  2312  	ycover[Ycx*Ymax+Yrx] = 1
  2313  
  2314  	ycover[Yax*Ymax+Yrl] = 1
  2315  	ycover[Ycx*Ymax+Yrl] = 1
  2316  	ycover[Yrx*Ymax+Yrl] = 1
  2317  	ycover[Yrl32*Ymax+Yrl] = 1
  2318  
  2319  	ycover[Yf0*Ymax+Yrf] = 1
  2320  
  2321  	ycover[Yal*Ymax+Ymb] = 1
  2322  	ycover[Ycl*Ymax+Ymb] = 1
  2323  	ycover[Yax*Ymax+Ymb] = 1
  2324  	ycover[Ycx*Ymax+Ymb] = 1
  2325  	ycover[Yrx*Ymax+Ymb] = 1
  2326  	ycover[Yrb*Ymax+Ymb] = 1
  2327  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2328  	ycover[Ym*Ymax+Ymb] = 1
  2329  
  2330  	ycover[Yax*Ymax+Yml] = 1
  2331  	ycover[Ycx*Ymax+Yml] = 1
  2332  	ycover[Yrx*Ymax+Yml] = 1
  2333  	ycover[Yrl*Ymax+Yml] = 1
  2334  	ycover[Yrl32*Ymax+Yml] = 1
  2335  	ycover[Ym*Ymax+Yml] = 1
  2336  
  2337  	ycover[Yax*Ymax+Ymm] = 1
  2338  	ycover[Ycx*Ymax+Ymm] = 1
  2339  	ycover[Yrx*Ymax+Ymm] = 1
  2340  	ycover[Yrl*Ymax+Ymm] = 1
  2341  	ycover[Yrl32*Ymax+Ymm] = 1
  2342  	ycover[Ym*Ymax+Ymm] = 1
  2343  	ycover[Ymr*Ymax+Ymm] = 1
  2344  
  2345  	ycover[Yxr0*Ymax+Yxr] = 1
  2346  
  2347  	ycover[Ym*Ymax+Yxm] = 1
  2348  	ycover[Yxr0*Ymax+Yxm] = 1
  2349  	ycover[Yxr*Ymax+Yxm] = 1
  2350  
  2351  	ycover[Ym*Ymax+Yym] = 1
  2352  	ycover[Yyr*Ymax+Yym] = 1
  2353  
  2354  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2355  	ycover[Yxr*Ymax+YxrEvex] = 1
  2356  
  2357  	ycover[Ym*Ymax+YxmEvex] = 1
  2358  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2359  	ycover[Yxr*Ymax+YxmEvex] = 1
  2360  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2361  
  2362  	ycover[Yyr*Ymax+YyrEvex] = 1
  2363  
  2364  	ycover[Ym*Ymax+YymEvex] = 1
  2365  	ycover[Yyr*Ymax+YymEvex] = 1
  2366  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2367  
  2368  	ycover[Ym*Ymax+Yzm] = 1
  2369  	ycover[Yzr*Ymax+Yzm] = 1
  2370  
  2371  	ycover[Yk0*Ymax+Yk] = 1
  2372  	ycover[Yknot0*Ymax+Yk] = 1
  2373  
  2374  	ycover[Yk0*Ymax+Ykm] = 1
  2375  	ycover[Yknot0*Ymax+Ykm] = 1
  2376  	ycover[Yk*Ymax+Ykm] = 1
  2377  	ycover[Ym*Ymax+Ykm] = 1
  2378  
  2379  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2380  
  2381  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2382  
  2383  	for i := 0; i < MAXREG; i++ {
  2384  		reg[i] = -1
  2385  		if i >= REG_AL && i <= REG_R15B {
  2386  			reg[i] = (i - REG_AL) & 7
  2387  			if i >= REG_SPB && i <= REG_DIB {
  2388  				regrex[i] = 0x40
  2389  			}
  2390  			if i >= REG_R8B && i <= REG_R15B {
  2391  				regrex[i] = Rxr | Rxx | Rxb
  2392  			}
  2393  		}
  2394  
  2395  		if i >= REG_AH && i <= REG_BH {
  2396  			reg[i] = 4 + ((i - REG_AH) & 7)
  2397  		}
  2398  		if i >= REG_AX && i <= REG_R15 {
  2399  			reg[i] = (i - REG_AX) & 7
  2400  			if i >= REG_R8 {
  2401  				regrex[i] = Rxr | Rxx | Rxb
  2402  			}
  2403  		}
  2404  
  2405  		if i >= REG_F0 && i <= REG_F0+7 {
  2406  			reg[i] = (i - REG_F0) & 7
  2407  		}
  2408  		if i >= REG_M0 && i <= REG_M0+7 {
  2409  			reg[i] = (i - REG_M0) & 7
  2410  		}
  2411  		if i >= REG_K0 && i <= REG_K0+7 {
  2412  			reg[i] = (i - REG_K0) & 7
  2413  		}
  2414  		if i >= REG_X0 && i <= REG_X0+15 {
  2415  			reg[i] = (i - REG_X0) & 7
  2416  			if i >= REG_X0+8 {
  2417  				regrex[i] = Rxr | Rxx | Rxb
  2418  			}
  2419  		}
  2420  		if i >= REG_X16 && i <= REG_X16+15 {
  2421  			reg[i] = (i - REG_X16) & 7
  2422  			if i >= REG_X16+8 {
  2423  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2424  			} else {
  2425  				regrex[i] = RxrEvex
  2426  			}
  2427  		}
  2428  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2429  			reg[i] = (i - REG_Y0) & 7
  2430  			if i >= REG_Y0+8 {
  2431  				regrex[i] = Rxr | Rxx | Rxb
  2432  			}
  2433  		}
  2434  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2435  			reg[i] = (i - REG_Y16) & 7
  2436  			if i >= REG_Y16+8 {
  2437  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2438  			} else {
  2439  				regrex[i] = RxrEvex
  2440  			}
  2441  		}
  2442  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2443  			reg[i] = (i - REG_Z0) & 7
  2444  			if i > REG_Z0+7 {
  2445  				regrex[i] = Rxr | Rxx | Rxb
  2446  			}
  2447  		}
  2448  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2449  			reg[i] = (i - REG_Z16) & 7
  2450  			if i >= REG_Z16+8 {
  2451  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2452  			} else {
  2453  				regrex[i] = RxrEvex
  2454  			}
  2455  		}
  2456  
  2457  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2458  			regrex[i] = Rxr
  2459  		}
  2460  	}
  2461  }
  2462  
  2463  var isAndroid = objabi.GOOS == "android"
  2464  
  2465  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2466  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2467  		return 0
  2468  	}
  2469  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2470  		switch a.Reg {
  2471  		case REG_CS:
  2472  			return 0x2e
  2473  
  2474  		case REG_DS:
  2475  			return 0x3e
  2476  
  2477  		case REG_ES:
  2478  			return 0x26
  2479  
  2480  		case REG_FS:
  2481  			return 0x64
  2482  
  2483  		case REG_GS:
  2484  			return 0x65
  2485  
  2486  		case REG_TLS:
  2487  			// NOTE: Systems listed here should be only systems that
  2488  			// support direct TLS references like 8(TLS) implemented as
  2489  			// direct references from FS or GS. Systems that require
  2490  			// the initial-exec model, where you load the TLS base into
  2491  			// a register and then index from that register, do not reach
  2492  			// this code and should not be listed.
  2493  			if ctxt.Arch.Family == sys.I386 {
  2494  				switch ctxt.Headtype {
  2495  				default:
  2496  					if isAndroid {
  2497  						return 0x65 // GS
  2498  					}
  2499  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2500  
  2501  				case objabi.Hdarwin,
  2502  					objabi.Hdragonfly,
  2503  					objabi.Hfreebsd,
  2504  					objabi.Hnetbsd,
  2505  					objabi.Hopenbsd:
  2506  					return 0x65 // GS
  2507  				}
  2508  			}
  2509  
  2510  			switch ctxt.Headtype {
  2511  			default:
  2512  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2513  
  2514  			case objabi.Hlinux:
  2515  				if isAndroid {
  2516  					return 0x64 // FS
  2517  				}
  2518  
  2519  				if ctxt.Flag_shared {
  2520  					log.Fatalf("unknown TLS base register for linux with -shared")
  2521  				} else {
  2522  					return 0x64 // FS
  2523  				}
  2524  
  2525  			case objabi.Hdragonfly,
  2526  				objabi.Hfreebsd,
  2527  				objabi.Hnetbsd,
  2528  				objabi.Hopenbsd,
  2529  				objabi.Hsolaris:
  2530  				return 0x64 // FS
  2531  
  2532  			case objabi.Hdarwin:
  2533  				return 0x65 // GS
  2534  			}
  2535  		}
  2536  	}
  2537  
  2538  	if ctxt.Arch.Family == sys.I386 {
  2539  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2540  			// When building for inclusion into a shared library, an instruction of the form
  2541  			//     MOVL off(CX)(TLS*1), AX
  2542  			// becomes
  2543  			//     mov %gs:off(%ecx), %eax
  2544  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2545  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2546  			// a shared library the instruction it becomes
  2547  			//     mov 0x0(%ecx), %eax
  2548  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2549  			return 0x65 // GS
  2550  		}
  2551  		return 0
  2552  	}
  2553  
  2554  	switch a.Index {
  2555  	case REG_CS:
  2556  		return 0x2e
  2557  
  2558  	case REG_DS:
  2559  		return 0x3e
  2560  
  2561  	case REG_ES:
  2562  		return 0x26
  2563  
  2564  	case REG_TLS:
  2565  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2566  			// When building for inclusion into a shared library, an instruction of the form
  2567  			//     MOV off(CX)(TLS*1), AX
  2568  			// becomes
  2569  			//     mov %fs:off(%rcx), %rax
  2570  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2571  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2572  			// a shared library the instruction does not require a prefix.
  2573  			return 0x64
  2574  		}
  2575  
  2576  	case REG_FS:
  2577  		return 0x64
  2578  
  2579  	case REG_GS:
  2580  		return 0x65
  2581  	}
  2582  
  2583  	return 0
  2584  }
  2585  
  2586  // oclassRegList returns multisource operand class for addr.
  2587  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2588  	// TODO(quasilyte): when oclass register case is refactored into
  2589  	// lookup table, use it here to get register kind more easily.
  2590  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2591  
  2592  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2593  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2594  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2595  
  2596  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2597  	low := regIndex(int16(reg0))
  2598  	high := regIndex(int16(reg1))
  2599  
  2600  	if ctxt.Arch.Family == sys.I386 {
  2601  		if low >= 8 || high >= 8 {
  2602  			return Yxxx
  2603  		}
  2604  	}
  2605  
  2606  	switch high - low {
  2607  	case 3:
  2608  		switch {
  2609  		case regIsXmm(reg0) && regIsXmm(reg1):
  2610  			return YxrEvexMulti4
  2611  		case regIsYmm(reg0) && regIsYmm(reg1):
  2612  			return YyrEvexMulti4
  2613  		case regIsZmm(reg0) && regIsZmm(reg1):
  2614  			return YzrMulti4
  2615  		default:
  2616  			return Yxxx
  2617  		}
  2618  	default:
  2619  		return Yxxx
  2620  	}
  2621  }
  2622  
  2623  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2624  // For addr that is not V-mem returns (Yxxx, false).
  2625  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2626  	switch addr.Index {
  2627  	case REG_X0 + 0,
  2628  		REG_X0 + 1,
  2629  		REG_X0 + 2,
  2630  		REG_X0 + 3,
  2631  		REG_X0 + 4,
  2632  		REG_X0 + 5,
  2633  		REG_X0 + 6,
  2634  		REG_X0 + 7:
  2635  		return Yxvm, true
  2636  	case REG_X8 + 0,
  2637  		REG_X8 + 1,
  2638  		REG_X8 + 2,
  2639  		REG_X8 + 3,
  2640  		REG_X8 + 4,
  2641  		REG_X8 + 5,
  2642  		REG_X8 + 6,
  2643  		REG_X8 + 7:
  2644  		if ctxt.Arch.Family == sys.I386 {
  2645  			return Yxxx, true
  2646  		}
  2647  		return Yxvm, true
  2648  	case REG_X16 + 0,
  2649  		REG_X16 + 1,
  2650  		REG_X16 + 2,
  2651  		REG_X16 + 3,
  2652  		REG_X16 + 4,
  2653  		REG_X16 + 5,
  2654  		REG_X16 + 6,
  2655  		REG_X16 + 7,
  2656  		REG_X16 + 8,
  2657  		REG_X16 + 9,
  2658  		REG_X16 + 10,
  2659  		REG_X16 + 11,
  2660  		REG_X16 + 12,
  2661  		REG_X16 + 13,
  2662  		REG_X16 + 14,
  2663  		REG_X16 + 15:
  2664  		if ctxt.Arch.Family == sys.I386 {
  2665  			return Yxxx, true
  2666  		}
  2667  		return YxvmEvex, true
  2668  
  2669  	case REG_Y0 + 0,
  2670  		REG_Y0 + 1,
  2671  		REG_Y0 + 2,
  2672  		REG_Y0 + 3,
  2673  		REG_Y0 + 4,
  2674  		REG_Y0 + 5,
  2675  		REG_Y0 + 6,
  2676  		REG_Y0 + 7:
  2677  		return Yyvm, true
  2678  	case REG_Y8 + 0,
  2679  		REG_Y8 + 1,
  2680  		REG_Y8 + 2,
  2681  		REG_Y8 + 3,
  2682  		REG_Y8 + 4,
  2683  		REG_Y8 + 5,
  2684  		REG_Y8 + 6,
  2685  		REG_Y8 + 7:
  2686  		if ctxt.Arch.Family == sys.I386 {
  2687  			return Yxxx, true
  2688  		}
  2689  		return Yyvm, true
  2690  	case REG_Y16 + 0,
  2691  		REG_Y16 + 1,
  2692  		REG_Y16 + 2,
  2693  		REG_Y16 + 3,
  2694  		REG_Y16 + 4,
  2695  		REG_Y16 + 5,
  2696  		REG_Y16 + 6,
  2697  		REG_Y16 + 7,
  2698  		REG_Y16 + 8,
  2699  		REG_Y16 + 9,
  2700  		REG_Y16 + 10,
  2701  		REG_Y16 + 11,
  2702  		REG_Y16 + 12,
  2703  		REG_Y16 + 13,
  2704  		REG_Y16 + 14,
  2705  		REG_Y16 + 15:
  2706  		if ctxt.Arch.Family == sys.I386 {
  2707  			return Yxxx, true
  2708  		}
  2709  		return YyvmEvex, true
  2710  
  2711  	case REG_Z0 + 0,
  2712  		REG_Z0 + 1,
  2713  		REG_Z0 + 2,
  2714  		REG_Z0 + 3,
  2715  		REG_Z0 + 4,
  2716  		REG_Z0 + 5,
  2717  		REG_Z0 + 6,
  2718  		REG_Z0 + 7:
  2719  		return Yzvm, true
  2720  	case REG_Z8 + 0,
  2721  		REG_Z8 + 1,
  2722  		REG_Z8 + 2,
  2723  		REG_Z8 + 3,
  2724  		REG_Z8 + 4,
  2725  		REG_Z8 + 5,
  2726  		REG_Z8 + 6,
  2727  		REG_Z8 + 7,
  2728  		REG_Z8 + 8,
  2729  		REG_Z8 + 9,
  2730  		REG_Z8 + 10,
  2731  		REG_Z8 + 11,
  2732  		REG_Z8 + 12,
  2733  		REG_Z8 + 13,
  2734  		REG_Z8 + 14,
  2735  		REG_Z8 + 15,
  2736  		REG_Z8 + 16,
  2737  		REG_Z8 + 17,
  2738  		REG_Z8 + 18,
  2739  		REG_Z8 + 19,
  2740  		REG_Z8 + 20,
  2741  		REG_Z8 + 21,
  2742  		REG_Z8 + 22,
  2743  		REG_Z8 + 23:
  2744  		if ctxt.Arch.Family == sys.I386 {
  2745  			return Yxxx, true
  2746  		}
  2747  		return Yzvm, true
  2748  	}
  2749  
  2750  	return Yxxx, false
  2751  }
  2752  
  2753  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2754  	switch a.Type {
  2755  	case obj.TYPE_REGLIST:
  2756  		return oclassRegList(ctxt, a)
  2757  
  2758  	case obj.TYPE_NONE:
  2759  		return Ynone
  2760  
  2761  	case obj.TYPE_BRANCH:
  2762  		return Ybr
  2763  
  2764  	case obj.TYPE_INDIR:
  2765  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2766  			return Yindir
  2767  		}
  2768  		return Yxxx
  2769  
  2770  	case obj.TYPE_MEM:
  2771  		// Pseudo registers have negative index, but SP is
  2772  		// not pseudo on x86, hence REG_SP check is not redundant.
  2773  		if a.Index == REG_SP || a.Index < 0 {
  2774  			// Can't use FP/SB/PC/SP as the index register.
  2775  			return Yxxx
  2776  		}
  2777  
  2778  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2779  			return vmem
  2780  		}
  2781  
  2782  		if ctxt.Arch.Family == sys.AMD64 {
  2783  			switch a.Name {
  2784  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2785  				// Global variables can't use index registers and their
  2786  				// base register is %rip (%rip is encoded as REG_NONE).
  2787  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2788  					return Yxxx
  2789  				}
  2790  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2791  				// These names must have a base of SP.  The old compiler
  2792  				// uses 0 for the base register. SSA uses REG_SP.
  2793  				if a.Reg != REG_SP && a.Reg != 0 {
  2794  					return Yxxx
  2795  				}
  2796  			case obj.NAME_NONE:
  2797  				// everything is ok
  2798  			default:
  2799  				// unknown name
  2800  				return Yxxx
  2801  			}
  2802  		}
  2803  		return Ym
  2804  
  2805  	case obj.TYPE_ADDR:
  2806  		switch a.Name {
  2807  		case obj.NAME_GOTREF:
  2808  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2809  			return Yxxx
  2810  
  2811  		case obj.NAME_EXTERN,
  2812  			obj.NAME_STATIC:
  2813  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2814  				return Yi32
  2815  			}
  2816  			return Yiauto // use pc-relative addressing
  2817  
  2818  		case obj.NAME_AUTO,
  2819  			obj.NAME_PARAM:
  2820  			return Yiauto
  2821  		}
  2822  
  2823  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2824  		// and got Yi32 in an earlier version of this code.
  2825  		// Keep doing that until we fix yduff etc.
  2826  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2827  			return Yi32
  2828  		}
  2829  
  2830  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2831  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2832  		}
  2833  		fallthrough
  2834  
  2835  	case obj.TYPE_CONST:
  2836  		if a.Sym != nil {
  2837  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2838  		}
  2839  
  2840  		v := a.Offset
  2841  		if ctxt.Arch.Family == sys.I386 {
  2842  			v = int64(int32(v))
  2843  		}
  2844  		switch {
  2845  		case v == 0:
  2846  			return Yi0
  2847  		case v == 1:
  2848  			return Yi1
  2849  		case v >= 0 && v <= 3:
  2850  			return Yu2
  2851  		case v >= 0 && v <= 127:
  2852  			return Yu7
  2853  		case v >= 0 && v <= 255:
  2854  			return Yu8
  2855  		case v >= -128 && v <= 127:
  2856  			return Yi8
  2857  		}
  2858  		if ctxt.Arch.Family == sys.I386 {
  2859  			return Yi32
  2860  		}
  2861  		l := int32(v)
  2862  		if int64(l) == v {
  2863  			return Ys32 // can sign extend
  2864  		}
  2865  		if v>>32 == 0 {
  2866  			return Yi32 // unsigned
  2867  		}
  2868  		return Yi64
  2869  
  2870  	case obj.TYPE_TEXTSIZE:
  2871  		return Ytextsize
  2872  	}
  2873  
  2874  	if a.Type != obj.TYPE_REG {
  2875  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2876  		return Yxxx
  2877  	}
  2878  
  2879  	switch a.Reg {
  2880  	case REG_AL:
  2881  		return Yal
  2882  
  2883  	case REG_AX:
  2884  		return Yax
  2885  
  2886  		/*
  2887  			case REG_SPB:
  2888  		*/
  2889  	case REG_BPB,
  2890  		REG_SIB,
  2891  		REG_DIB,
  2892  		REG_R8B,
  2893  		REG_R9B,
  2894  		REG_R10B,
  2895  		REG_R11B,
  2896  		REG_R12B,
  2897  		REG_R13B,
  2898  		REG_R14B,
  2899  		REG_R15B:
  2900  		if ctxt.Arch.Family == sys.I386 {
  2901  			return Yxxx
  2902  		}
  2903  		fallthrough
  2904  
  2905  	case REG_DL,
  2906  		REG_BL,
  2907  		REG_AH,
  2908  		REG_CH,
  2909  		REG_DH,
  2910  		REG_BH:
  2911  		return Yrb
  2912  
  2913  	case REG_CL:
  2914  		return Ycl
  2915  
  2916  	case REG_CX:
  2917  		return Ycx
  2918  
  2919  	case REG_DX, REG_BX:
  2920  		return Yrx
  2921  
  2922  	case REG_R8, // not really Yrl
  2923  		REG_R9,
  2924  		REG_R10,
  2925  		REG_R11,
  2926  		REG_R12,
  2927  		REG_R13,
  2928  		REG_R14,
  2929  		REG_R15:
  2930  		if ctxt.Arch.Family == sys.I386 {
  2931  			return Yxxx
  2932  		}
  2933  		fallthrough
  2934  
  2935  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2936  		if ctxt.Arch.Family == sys.I386 {
  2937  			return Yrl32
  2938  		}
  2939  		return Yrl
  2940  
  2941  	case REG_F0 + 0:
  2942  		return Yf0
  2943  
  2944  	case REG_F0 + 1,
  2945  		REG_F0 + 2,
  2946  		REG_F0 + 3,
  2947  		REG_F0 + 4,
  2948  		REG_F0 + 5,
  2949  		REG_F0 + 6,
  2950  		REG_F0 + 7:
  2951  		return Yrf
  2952  
  2953  	case REG_M0 + 0,
  2954  		REG_M0 + 1,
  2955  		REG_M0 + 2,
  2956  		REG_M0 + 3,
  2957  		REG_M0 + 4,
  2958  		REG_M0 + 5,
  2959  		REG_M0 + 6,
  2960  		REG_M0 + 7:
  2961  		return Ymr
  2962  
  2963  	case REG_X0:
  2964  		return Yxr0
  2965  
  2966  	case REG_X0 + 1,
  2967  		REG_X0 + 2,
  2968  		REG_X0 + 3,
  2969  		REG_X0 + 4,
  2970  		REG_X0 + 5,
  2971  		REG_X0 + 6,
  2972  		REG_X0 + 7,
  2973  		REG_X0 + 8,
  2974  		REG_X0 + 9,
  2975  		REG_X0 + 10,
  2976  		REG_X0 + 11,
  2977  		REG_X0 + 12,
  2978  		REG_X0 + 13,
  2979  		REG_X0 + 14,
  2980  		REG_X0 + 15:
  2981  		return Yxr
  2982  
  2983  	case REG_X0 + 16,
  2984  		REG_X0 + 17,
  2985  		REG_X0 + 18,
  2986  		REG_X0 + 19,
  2987  		REG_X0 + 20,
  2988  		REG_X0 + 21,
  2989  		REG_X0 + 22,
  2990  		REG_X0 + 23,
  2991  		REG_X0 + 24,
  2992  		REG_X0 + 25,
  2993  		REG_X0 + 26,
  2994  		REG_X0 + 27,
  2995  		REG_X0 + 28,
  2996  		REG_X0 + 29,
  2997  		REG_X0 + 30,
  2998  		REG_X0 + 31:
  2999  		return YxrEvex
  3000  
  3001  	case REG_Y0 + 0,
  3002  		REG_Y0 + 1,
  3003  		REG_Y0 + 2,
  3004  		REG_Y0 + 3,
  3005  		REG_Y0 + 4,
  3006  		REG_Y0 + 5,
  3007  		REG_Y0 + 6,
  3008  		REG_Y0 + 7,
  3009  		REG_Y0 + 8,
  3010  		REG_Y0 + 9,
  3011  		REG_Y0 + 10,
  3012  		REG_Y0 + 11,
  3013  		REG_Y0 + 12,
  3014  		REG_Y0 + 13,
  3015  		REG_Y0 + 14,
  3016  		REG_Y0 + 15:
  3017  		return Yyr
  3018  
  3019  	case REG_Y0 + 16,
  3020  		REG_Y0 + 17,
  3021  		REG_Y0 + 18,
  3022  		REG_Y0 + 19,
  3023  		REG_Y0 + 20,
  3024  		REG_Y0 + 21,
  3025  		REG_Y0 + 22,
  3026  		REG_Y0 + 23,
  3027  		REG_Y0 + 24,
  3028  		REG_Y0 + 25,
  3029  		REG_Y0 + 26,
  3030  		REG_Y0 + 27,
  3031  		REG_Y0 + 28,
  3032  		REG_Y0 + 29,
  3033  		REG_Y0 + 30,
  3034  		REG_Y0 + 31:
  3035  		return YyrEvex
  3036  
  3037  	case REG_Z0 + 0,
  3038  		REG_Z0 + 1,
  3039  		REG_Z0 + 2,
  3040  		REG_Z0 + 3,
  3041  		REG_Z0 + 4,
  3042  		REG_Z0 + 5,
  3043  		REG_Z0 + 6,
  3044  		REG_Z0 + 7:
  3045  		return Yzr
  3046  
  3047  	case REG_Z0 + 8,
  3048  		REG_Z0 + 9,
  3049  		REG_Z0 + 10,
  3050  		REG_Z0 + 11,
  3051  		REG_Z0 + 12,
  3052  		REG_Z0 + 13,
  3053  		REG_Z0 + 14,
  3054  		REG_Z0 + 15,
  3055  		REG_Z0 + 16,
  3056  		REG_Z0 + 17,
  3057  		REG_Z0 + 18,
  3058  		REG_Z0 + 19,
  3059  		REG_Z0 + 20,
  3060  		REG_Z0 + 21,
  3061  		REG_Z0 + 22,
  3062  		REG_Z0 + 23,
  3063  		REG_Z0 + 24,
  3064  		REG_Z0 + 25,
  3065  		REG_Z0 + 26,
  3066  		REG_Z0 + 27,
  3067  		REG_Z0 + 28,
  3068  		REG_Z0 + 29,
  3069  		REG_Z0 + 30,
  3070  		REG_Z0 + 31:
  3071  		if ctxt.Arch.Family == sys.I386 {
  3072  			return Yxxx
  3073  		}
  3074  		return Yzr
  3075  
  3076  	case REG_K0:
  3077  		return Yk0
  3078  
  3079  	case REG_K0 + 1,
  3080  		REG_K0 + 2,
  3081  		REG_K0 + 3,
  3082  		REG_K0 + 4,
  3083  		REG_K0 + 5,
  3084  		REG_K0 + 6,
  3085  		REG_K0 + 7:
  3086  		return Yknot0
  3087  
  3088  	case REG_CS:
  3089  		return Ycs
  3090  	case REG_SS:
  3091  		return Yss
  3092  	case REG_DS:
  3093  		return Yds
  3094  	case REG_ES:
  3095  		return Yes
  3096  	case REG_FS:
  3097  		return Yfs
  3098  	case REG_GS:
  3099  		return Ygs
  3100  	case REG_TLS:
  3101  		return Ytls
  3102  
  3103  	case REG_GDTR:
  3104  		return Ygdtr
  3105  	case REG_IDTR:
  3106  		return Yidtr
  3107  	case REG_LDTR:
  3108  		return Yldtr
  3109  	case REG_MSW:
  3110  		return Ymsw
  3111  	case REG_TASK:
  3112  		return Ytask
  3113  
  3114  	case REG_CR + 0:
  3115  		return Ycr0
  3116  	case REG_CR + 1:
  3117  		return Ycr1
  3118  	case REG_CR + 2:
  3119  		return Ycr2
  3120  	case REG_CR + 3:
  3121  		return Ycr3
  3122  	case REG_CR + 4:
  3123  		return Ycr4
  3124  	case REG_CR + 5:
  3125  		return Ycr5
  3126  	case REG_CR + 6:
  3127  		return Ycr6
  3128  	case REG_CR + 7:
  3129  		return Ycr7
  3130  	case REG_CR + 8:
  3131  		return Ycr8
  3132  
  3133  	case REG_DR + 0:
  3134  		return Ydr0
  3135  	case REG_DR + 1:
  3136  		return Ydr1
  3137  	case REG_DR + 2:
  3138  		return Ydr2
  3139  	case REG_DR + 3:
  3140  		return Ydr3
  3141  	case REG_DR + 4:
  3142  		return Ydr4
  3143  	case REG_DR + 5:
  3144  		return Ydr5
  3145  	case REG_DR + 6:
  3146  		return Ydr6
  3147  	case REG_DR + 7:
  3148  		return Ydr7
  3149  
  3150  	case REG_TR + 0:
  3151  		return Ytr0
  3152  	case REG_TR + 1:
  3153  		return Ytr1
  3154  	case REG_TR + 2:
  3155  		return Ytr2
  3156  	case REG_TR + 3:
  3157  		return Ytr3
  3158  	case REG_TR + 4:
  3159  		return Ytr4
  3160  	case REG_TR + 5:
  3161  		return Ytr5
  3162  	case REG_TR + 6:
  3163  		return Ytr6
  3164  	case REG_TR + 7:
  3165  		return Ytr7
  3166  	}
  3167  
  3168  	return Yxxx
  3169  }
  3170  
  3171  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3172  // and hold assembly state.
  3173  type AsmBuf struct {
  3174  	buf      [100]byte
  3175  	off      int
  3176  	rexflag  int
  3177  	vexflag  bool // Per inst: true for VEX-encoded
  3178  	evexflag bool // Per inst: true for EVEX-encoded
  3179  	rep      bool
  3180  	repn     bool
  3181  	lock     bool
  3182  
  3183  	evex evexBits // Initialized when evexflag is true
  3184  }
  3185  
  3186  // Put1 appends one byte to the end of the buffer.
  3187  func (ab *AsmBuf) Put1(x byte) {
  3188  	ab.buf[ab.off] = x
  3189  	ab.off++
  3190  }
  3191  
  3192  // Put2 appends two bytes to the end of the buffer.
  3193  func (ab *AsmBuf) Put2(x, y byte) {
  3194  	ab.buf[ab.off+0] = x
  3195  	ab.buf[ab.off+1] = y
  3196  	ab.off += 2
  3197  }
  3198  
  3199  // Put3 appends three bytes to the end of the buffer.
  3200  func (ab *AsmBuf) Put3(x, y, z byte) {
  3201  	ab.buf[ab.off+0] = x
  3202  	ab.buf[ab.off+1] = y
  3203  	ab.buf[ab.off+2] = z
  3204  	ab.off += 3
  3205  }
  3206  
  3207  // Put4 appends four bytes to the end of the buffer.
  3208  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3209  	ab.buf[ab.off+0] = x
  3210  	ab.buf[ab.off+1] = y
  3211  	ab.buf[ab.off+2] = z
  3212  	ab.buf[ab.off+3] = w
  3213  	ab.off += 4
  3214  }
  3215  
  3216  // PutInt16 writes v into the buffer using little-endian encoding.
  3217  func (ab *AsmBuf) PutInt16(v int16) {
  3218  	ab.buf[ab.off+0] = byte(v)
  3219  	ab.buf[ab.off+1] = byte(v >> 8)
  3220  	ab.off += 2
  3221  }
  3222  
  3223  // PutInt32 writes v into the buffer using little-endian encoding.
  3224  func (ab *AsmBuf) PutInt32(v int32) {
  3225  	ab.buf[ab.off+0] = byte(v)
  3226  	ab.buf[ab.off+1] = byte(v >> 8)
  3227  	ab.buf[ab.off+2] = byte(v >> 16)
  3228  	ab.buf[ab.off+3] = byte(v >> 24)
  3229  	ab.off += 4
  3230  }
  3231  
  3232  // PutInt64 writes v into the buffer using little-endian encoding.
  3233  func (ab *AsmBuf) PutInt64(v int64) {
  3234  	ab.buf[ab.off+0] = byte(v)
  3235  	ab.buf[ab.off+1] = byte(v >> 8)
  3236  	ab.buf[ab.off+2] = byte(v >> 16)
  3237  	ab.buf[ab.off+3] = byte(v >> 24)
  3238  	ab.buf[ab.off+4] = byte(v >> 32)
  3239  	ab.buf[ab.off+5] = byte(v >> 40)
  3240  	ab.buf[ab.off+6] = byte(v >> 48)
  3241  	ab.buf[ab.off+7] = byte(v >> 56)
  3242  	ab.off += 8
  3243  }
  3244  
  3245  // Put copies b into the buffer.
  3246  func (ab *AsmBuf) Put(b []byte) {
  3247  	copy(ab.buf[ab.off:], b)
  3248  	ab.off += len(b)
  3249  }
  3250  
  3251  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3252  // starting at specified offset (e.g. z counter value).
  3253  // Trailing 0 is not written.
  3254  //
  3255  // Intended to be used for literal Z cases.
  3256  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3257  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3258  	for int(op[offset]) != 0 {
  3259  		ab.Put1(byte(op[offset]))
  3260  		offset++
  3261  	}
  3262  }
  3263  
  3264  // Insert inserts b at offset i.
  3265  func (ab *AsmBuf) Insert(i int, b byte) {
  3266  	ab.off++
  3267  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3268  	ab.buf[i] = b
  3269  }
  3270  
  3271  // Last returns the byte at the end of the buffer.
  3272  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3273  
  3274  // Len returns the length of the buffer.
  3275  func (ab *AsmBuf) Len() int { return ab.off }
  3276  
  3277  // Bytes returns the contents of the buffer.
  3278  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3279  
  3280  // Reset empties the buffer.
  3281  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3282  
  3283  // At returns the byte at offset i.
  3284  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3285  
  3286  // asmidx emits SIB byte.
  3287  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3288  	var i int
  3289  
  3290  	// X/Y index register is used in VSIB.
  3291  	switch index {
  3292  	default:
  3293  		goto bad
  3294  
  3295  	case REG_NONE:
  3296  		i = 4 << 3
  3297  		goto bas
  3298  
  3299  	case REG_R8,
  3300  		REG_R9,
  3301  		REG_R10,
  3302  		REG_R11,
  3303  		REG_R12,
  3304  		REG_R13,
  3305  		REG_R14,
  3306  		REG_R15,
  3307  		REG_X8,
  3308  		REG_X9,
  3309  		REG_X10,
  3310  		REG_X11,
  3311  		REG_X12,
  3312  		REG_X13,
  3313  		REG_X14,
  3314  		REG_X15,
  3315  		REG_X16,
  3316  		REG_X17,
  3317  		REG_X18,
  3318  		REG_X19,
  3319  		REG_X20,
  3320  		REG_X21,
  3321  		REG_X22,
  3322  		REG_X23,
  3323  		REG_X24,
  3324  		REG_X25,
  3325  		REG_X26,
  3326  		REG_X27,
  3327  		REG_X28,
  3328  		REG_X29,
  3329  		REG_X30,
  3330  		REG_X31,
  3331  		REG_Y8,
  3332  		REG_Y9,
  3333  		REG_Y10,
  3334  		REG_Y11,
  3335  		REG_Y12,
  3336  		REG_Y13,
  3337  		REG_Y14,
  3338  		REG_Y15,
  3339  		REG_Y16,
  3340  		REG_Y17,
  3341  		REG_Y18,
  3342  		REG_Y19,
  3343  		REG_Y20,
  3344  		REG_Y21,
  3345  		REG_Y22,
  3346  		REG_Y23,
  3347  		REG_Y24,
  3348  		REG_Y25,
  3349  		REG_Y26,
  3350  		REG_Y27,
  3351  		REG_Y28,
  3352  		REG_Y29,
  3353  		REG_Y30,
  3354  		REG_Y31,
  3355  		REG_Z8,
  3356  		REG_Z9,
  3357  		REG_Z10,
  3358  		REG_Z11,
  3359  		REG_Z12,
  3360  		REG_Z13,
  3361  		REG_Z14,
  3362  		REG_Z15,
  3363  		REG_Z16,
  3364  		REG_Z17,
  3365  		REG_Z18,
  3366  		REG_Z19,
  3367  		REG_Z20,
  3368  		REG_Z21,
  3369  		REG_Z22,
  3370  		REG_Z23,
  3371  		REG_Z24,
  3372  		REG_Z25,
  3373  		REG_Z26,
  3374  		REG_Z27,
  3375  		REG_Z28,
  3376  		REG_Z29,
  3377  		REG_Z30,
  3378  		REG_Z31:
  3379  		if ctxt.Arch.Family == sys.I386 {
  3380  			goto bad
  3381  		}
  3382  		fallthrough
  3383  
  3384  	case REG_AX,
  3385  		REG_CX,
  3386  		REG_DX,
  3387  		REG_BX,
  3388  		REG_BP,
  3389  		REG_SI,
  3390  		REG_DI,
  3391  		REG_X0,
  3392  		REG_X1,
  3393  		REG_X2,
  3394  		REG_X3,
  3395  		REG_X4,
  3396  		REG_X5,
  3397  		REG_X6,
  3398  		REG_X7,
  3399  		REG_Y0,
  3400  		REG_Y1,
  3401  		REG_Y2,
  3402  		REG_Y3,
  3403  		REG_Y4,
  3404  		REG_Y5,
  3405  		REG_Y6,
  3406  		REG_Y7,
  3407  		REG_Z0,
  3408  		REG_Z1,
  3409  		REG_Z2,
  3410  		REG_Z3,
  3411  		REG_Z4,
  3412  		REG_Z5,
  3413  		REG_Z6,
  3414  		REG_Z7:
  3415  		i = reg[index] << 3
  3416  	}
  3417  
  3418  	switch scale {
  3419  	default:
  3420  		goto bad
  3421  
  3422  	case 1:
  3423  		break
  3424  
  3425  	case 2:
  3426  		i |= 1 << 6
  3427  
  3428  	case 4:
  3429  		i |= 2 << 6
  3430  
  3431  	case 8:
  3432  		i |= 3 << 6
  3433  	}
  3434  
  3435  bas:
  3436  	switch base {
  3437  	default:
  3438  		goto bad
  3439  
  3440  	case REG_NONE: // must be mod=00
  3441  		i |= 5
  3442  
  3443  	case REG_R8,
  3444  		REG_R9,
  3445  		REG_R10,
  3446  		REG_R11,
  3447  		REG_R12,
  3448  		REG_R13,
  3449  		REG_R14,
  3450  		REG_R15:
  3451  		if ctxt.Arch.Family == sys.I386 {
  3452  			goto bad
  3453  		}
  3454  		fallthrough
  3455  
  3456  	case REG_AX,
  3457  		REG_CX,
  3458  		REG_DX,
  3459  		REG_BX,
  3460  		REG_SP,
  3461  		REG_BP,
  3462  		REG_SI,
  3463  		REG_DI:
  3464  		i |= reg[base]
  3465  	}
  3466  
  3467  	ab.Put1(byte(i))
  3468  	return
  3469  
  3470  bad:
  3471  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3472  	ab.Put1(0)
  3473  }
  3474  
  3475  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3476  	var rel obj.Reloc
  3477  
  3478  	v := vaddr(ctxt, p, a, &rel)
  3479  	if rel.Siz != 0 {
  3480  		if rel.Siz != 4 {
  3481  			ctxt.Diag("bad reloc")
  3482  		}
  3483  		r := obj.Addrel(cursym)
  3484  		*r = rel
  3485  		r.Off = int32(p.Pc + int64(ab.Len()))
  3486  	}
  3487  
  3488  	ab.PutInt32(int32(v))
  3489  }
  3490  
  3491  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3492  	if r != nil {
  3493  		*r = obj.Reloc{}
  3494  	}
  3495  
  3496  	switch a.Name {
  3497  	case obj.NAME_STATIC,
  3498  		obj.NAME_GOTREF,
  3499  		obj.NAME_EXTERN:
  3500  		s := a.Sym
  3501  		if r == nil {
  3502  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3503  			log.Fatalf("reloc")
  3504  		}
  3505  
  3506  		if a.Name == obj.NAME_GOTREF {
  3507  			r.Siz = 4
  3508  			r.Type = objabi.R_GOTPCREL
  3509  		} else if useAbs(ctxt, s) {
  3510  			r.Siz = 4
  3511  			r.Type = objabi.R_ADDR
  3512  		} else {
  3513  			r.Siz = 4
  3514  			r.Type = objabi.R_PCREL
  3515  		}
  3516  
  3517  		r.Off = -1 // caller must fill in
  3518  		r.Sym = s
  3519  		r.Add = a.Offset
  3520  
  3521  		return 0
  3522  	}
  3523  
  3524  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3525  		if r == nil {
  3526  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3527  			log.Fatalf("reloc")
  3528  		}
  3529  
  3530  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3531  			r.Type = objabi.R_TLS_LE
  3532  			r.Siz = 4
  3533  			r.Off = -1 // caller must fill in
  3534  			r.Add = a.Offset
  3535  		}
  3536  		return 0
  3537  	}
  3538  
  3539  	return a.Offset
  3540  }
  3541  
  3542  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3543  	var base int
  3544  	var rel obj.Reloc
  3545  
  3546  	rex &= 0x40 | Rxr
  3547  	if a.Offset != int64(int32(a.Offset)) {
  3548  		// The rules are slightly different for 386 and AMD64,
  3549  		// mostly for historical reasons. We may unify them later,
  3550  		// but it must be discussed beforehand.
  3551  		//
  3552  		// For 64bit mode only LEAL is allowed to overflow.
  3553  		// It's how https://golang.org/cl/59630 made it.
  3554  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3555  		//
  3556  		// For 32bit mode rules are more permissive.
  3557  		// If offset fits uint32, it's permitted.
  3558  		// This is allowed for assembly that wants to use 32-bit hex
  3559  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3560  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3561  			(ctxt.Arch.Family != sys.AMD64 &&
  3562  				int64(uint32(a.Offset)) == a.Offset &&
  3563  				ab.rexflag&Rxw == 0)
  3564  		if !overflowOK {
  3565  			ctxt.Diag("offset too large in %s", p)
  3566  		}
  3567  	}
  3568  	v := int32(a.Offset)
  3569  	rel.Siz = 0
  3570  
  3571  	switch a.Type {
  3572  	case obj.TYPE_ADDR:
  3573  		if a.Name == obj.NAME_NONE {
  3574  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3575  		}
  3576  		if a.Index == REG_TLS {
  3577  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3578  		}
  3579  		goto bad
  3580  
  3581  	case obj.TYPE_REG:
  3582  		const regFirst = REG_AL
  3583  		const regLast = REG_Z31
  3584  		if a.Reg < regFirst || regLast < a.Reg {
  3585  			goto bad
  3586  		}
  3587  		if v != 0 {
  3588  			goto bad
  3589  		}
  3590  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3591  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3592  		return
  3593  	}
  3594  
  3595  	if a.Type != obj.TYPE_MEM {
  3596  		goto bad
  3597  	}
  3598  
  3599  	if a.Index != REG_NONE && a.Index != REG_TLS {
  3600  		base := int(a.Reg)
  3601  		switch a.Name {
  3602  		case obj.NAME_EXTERN,
  3603  			obj.NAME_GOTREF,
  3604  			obj.NAME_STATIC:
  3605  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3606  				goto bad
  3607  			}
  3608  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3609  				// The base register has already been set. It holds the PC
  3610  				// of this instruction returned by a PC-reading thunk.
  3611  				// See obj6.go:rewriteToPcrel.
  3612  			} else {
  3613  				base = REG_NONE
  3614  			}
  3615  			v = int32(vaddr(ctxt, p, a, &rel))
  3616  
  3617  		case obj.NAME_AUTO,
  3618  			obj.NAME_PARAM:
  3619  			base = REG_SP
  3620  		}
  3621  
  3622  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3623  		if base == REG_NONE {
  3624  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3625  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3626  			goto putrelv
  3627  		}
  3628  
  3629  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3630  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3631  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3632  			return
  3633  		}
  3634  
  3635  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3636  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3637  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3638  			ab.Put1(disp8)
  3639  			return
  3640  		}
  3641  
  3642  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3643  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3644  		goto putrelv
  3645  	}
  3646  
  3647  	base = int(a.Reg)
  3648  	switch a.Name {
  3649  	case obj.NAME_STATIC,
  3650  		obj.NAME_GOTREF,
  3651  		obj.NAME_EXTERN:
  3652  		if a.Sym == nil {
  3653  			ctxt.Diag("bad addr: %v", p)
  3654  		}
  3655  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3656  			// The base register has already been set. It holds the PC
  3657  			// of this instruction returned by a PC-reading thunk.
  3658  			// See obj6.go:rewriteToPcrel.
  3659  		} else {
  3660  			base = REG_NONE
  3661  		}
  3662  		v = int32(vaddr(ctxt, p, a, &rel))
  3663  
  3664  	case obj.NAME_AUTO,
  3665  		obj.NAME_PARAM:
  3666  		base = REG_SP
  3667  	}
  3668  
  3669  	if base == REG_TLS {
  3670  		v = int32(vaddr(ctxt, p, a, &rel))
  3671  	}
  3672  
  3673  	ab.rexflag |= regrex[base]&Rxb | rex
  3674  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3675  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3676  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3677  				ctxt.Diag("%v has offset against gotref", p)
  3678  			}
  3679  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3680  			goto putrelv
  3681  		}
  3682  
  3683  		// temporary
  3684  		ab.Put2(
  3685  			byte(0<<6|4<<0|r<<3), // sib present
  3686  			0<<6|4<<3|5<<0,       // DS:d32
  3687  		)
  3688  		goto putrelv
  3689  	}
  3690  
  3691  	if base == REG_SP || base == REG_R12 {
  3692  		if v == 0 {
  3693  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3694  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3695  			return
  3696  		}
  3697  
  3698  		if disp8, ok := toDisp8(v, p, ab); ok {
  3699  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3700  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3701  			ab.Put1(disp8)
  3702  			return
  3703  		}
  3704  
  3705  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3706  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3707  		goto putrelv
  3708  	}
  3709  
  3710  	if REG_AX <= base && base <= REG_R15 {
  3711  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
  3712  			rel = obj.Reloc{}
  3713  			rel.Type = objabi.R_TLS_LE
  3714  			rel.Siz = 4
  3715  			rel.Sym = nil
  3716  			rel.Add = int64(v)
  3717  			v = 0
  3718  		}
  3719  
  3720  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3721  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3722  			return
  3723  		}
  3724  
  3725  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3726  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3727  			return
  3728  		}
  3729  
  3730  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3731  		goto putrelv
  3732  	}
  3733  
  3734  	goto bad
  3735  
  3736  putrelv:
  3737  	if rel.Siz != 0 {
  3738  		if rel.Siz != 4 {
  3739  			ctxt.Diag("bad rel")
  3740  			goto bad
  3741  		}
  3742  
  3743  		r := obj.Addrel(cursym)
  3744  		*r = rel
  3745  		r.Off = int32(p.Pc + int64(ab.Len()))
  3746  	}
  3747  
  3748  	ab.PutInt32(v)
  3749  	return
  3750  
  3751  bad:
  3752  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3753  }
  3754  
  3755  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3756  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3757  }
  3758  
  3759  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3760  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3761  }
  3762  
  3763  func bytereg(a *obj.Addr, t *uint8) {
  3764  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3765  		a.Reg += REG_AL - REG_AX
  3766  		*t = 0
  3767  	}
  3768  }
  3769  
  3770  func unbytereg(a *obj.Addr, t *uint8) {
  3771  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3772  		a.Reg += REG_AX - REG_AL
  3773  		*t = 0
  3774  	}
  3775  }
  3776  
  3777  const (
  3778  	movLit uint8 = iota // Like Zlit
  3779  	movRegMem
  3780  	movMemReg
  3781  	movRegMem2op
  3782  	movMemReg2op
  3783  	movFullPtr // Load full pointer, trash heap (unsupported)
  3784  	movDoubleShift
  3785  	movTLSReg
  3786  )
  3787  
  3788  var ymovtab = []movtab{
  3789  	// push
  3790  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3791  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3792  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3793  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3794  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3795  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3796  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3797  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3798  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3799  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3800  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3801  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3802  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3803  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3804  
  3805  	// pop
  3806  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3807  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3808  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3809  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3810  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3811  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3812  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3813  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3814  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3815  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3816  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3817  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3818  
  3819  	// mov seg
  3820  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3821  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3822  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3823  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3824  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3825  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3826  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3827  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3828  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3829  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3830  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3831  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3832  
  3833  	// mov cr
  3834  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3835  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3836  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3837  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3838  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3839  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3840  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3841  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3842  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3843  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3844  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3845  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3846  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3847  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3848  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3849  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3850  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3851  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3852  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3853  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3854  
  3855  	// mov dr
  3856  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3857  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3858  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3859  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3860  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3861  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3862  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3863  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3864  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3865  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3866  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3867  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3868  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3869  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3870  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3871  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3872  
  3873  	// mov tr
  3874  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3875  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3876  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3877  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3878  
  3879  	// lgdt, sgdt, lidt, sidt
  3880  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3881  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3882  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3883  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3884  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3885  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3886  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3887  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3888  
  3889  	// lldt, sldt
  3890  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3891  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3892  
  3893  	// lmsw, smsw
  3894  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3895  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3896  
  3897  	// ltr, str
  3898  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3899  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3900  
  3901  	/* load full pointer - unsupported
  3902  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3903  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3904  	*/
  3905  
  3906  	// double shift
  3907  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3908  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3909  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3910  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3911  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3912  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3913  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3914  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3915  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3916  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3917  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3918  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3919  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3920  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3921  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3922  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3923  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3924  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3925  
  3926  	// load TLS base
  3927  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3928  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3929  	{0, 0, 0, 0, 0, [4]uint8{}},
  3930  }
  3931  
  3932  func isax(a *obj.Addr) bool {
  3933  	switch a.Reg {
  3934  	case REG_AX, REG_AL, REG_AH:
  3935  		return true
  3936  	}
  3937  
  3938  	if a.Index == REG_AX {
  3939  		return true
  3940  	}
  3941  	return false
  3942  }
  3943  
  3944  func subreg(p *obj.Prog, from int, to int) {
  3945  	if false { /* debug['Q'] */
  3946  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3947  	}
  3948  
  3949  	if int(p.From.Reg) == from {
  3950  		p.From.Reg = int16(to)
  3951  		p.Ft = 0
  3952  	}
  3953  
  3954  	if int(p.To.Reg) == from {
  3955  		p.To.Reg = int16(to)
  3956  		p.Tt = 0
  3957  	}
  3958  
  3959  	if int(p.From.Index) == from {
  3960  		p.From.Index = int16(to)
  3961  		p.Ft = 0
  3962  	}
  3963  
  3964  	if int(p.To.Index) == from {
  3965  		p.To.Index = int16(to)
  3966  		p.Tt = 0
  3967  	}
  3968  
  3969  	if false { /* debug['Q'] */
  3970  		fmt.Printf("%v\n", p)
  3971  	}
  3972  }
  3973  
  3974  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3975  	switch op {
  3976  	case Pm, Pe, Pf2, Pf3:
  3977  		if osize != 1 {
  3978  			if op != Pm {
  3979  				ab.Put1(byte(op))
  3980  			}
  3981  			ab.Put1(Pm)
  3982  			z++
  3983  			op = int(o.op[z])
  3984  			break
  3985  		}
  3986  		fallthrough
  3987  
  3988  	default:
  3989  		if ab.Len() == 0 || ab.Last() != Pm {
  3990  			ab.Put1(Pm)
  3991  		}
  3992  	}
  3993  
  3994  	ab.Put1(byte(op))
  3995  	return z
  3996  }
  3997  
  3998  var bpduff1 = []byte{
  3999  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  4000  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  4001  }
  4002  
  4003  var bpduff2 = []byte{
  4004  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  4005  }
  4006  
  4007  // asmevex emits EVEX pregis and opcode byte.
  4008  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4009  // K-masking register.
  4010  //
  4011  // Expects asmbuf.evex to be properly initialized.
  4012  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4013  	ab.evexflag = true
  4014  	evex := ab.evex
  4015  
  4016  	rexR := byte(1)
  4017  	evexR := byte(1)
  4018  	rexX := byte(1)
  4019  	rexB := byte(1)
  4020  	if r != nil {
  4021  		if regrex[r.Reg]&Rxr != 0 {
  4022  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4023  		}
  4024  		if regrex[r.Reg]&RxrEvex != 0 {
  4025  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4026  		}
  4027  	}
  4028  	if rm != nil {
  4029  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4030  			rexX = 0
  4031  		} else if regrex[rm.Index]&Rxx != 0 {
  4032  			rexX = 0
  4033  		}
  4034  		if regrex[rm.Reg]&Rxb != 0 {
  4035  			rexB = 0
  4036  		}
  4037  	}
  4038  	// P0 = [R][X][B][R'][00][mm]
  4039  	p0 := (rexR << 7) |
  4040  		(rexX << 6) |
  4041  		(rexB << 5) |
  4042  		(evexR << 4) |
  4043  		(0 << 2) |
  4044  		(evex.M() << 0)
  4045  
  4046  	vexV := byte(0)
  4047  	if v != nil {
  4048  		// 4bit-wide reg index.
  4049  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4050  	}
  4051  	vexV ^= 0x0F
  4052  	// P1 = [W][vvvv][1][pp]
  4053  	p1 := (evex.W() << 7) |
  4054  		(vexV << 3) |
  4055  		(1 << 2) |
  4056  		(evex.P() << 0)
  4057  
  4058  	suffix := evexSuffixMap[p.Scond]
  4059  	evexZ := byte(0)
  4060  	evexLL := evex.L()
  4061  	evexB := byte(0)
  4062  	evexV := byte(1)
  4063  	evexA := byte(0)
  4064  	if suffix.zeroing {
  4065  		if !evex.ZeroingEnabled() {
  4066  			ctxt.Diag("unsupported zeroing: %v", p)
  4067  		}
  4068  		evexZ = 1
  4069  	}
  4070  	switch {
  4071  	case suffix.rounding != rcUnset:
  4072  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4073  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4074  		} else if !evex.RoundingEnabled() {
  4075  			ctxt.Diag("unsupported rounding: %v", p)
  4076  		}
  4077  		evexB = 1
  4078  		evexLL = suffix.rounding
  4079  	case suffix.broadcast:
  4080  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4081  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4082  		} else if !evex.BroadcastEnabled() {
  4083  			ctxt.Diag("unsupported broadcast: %v", p)
  4084  		}
  4085  		evexB = 1
  4086  	case suffix.sae:
  4087  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4088  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4089  		} else if !evex.SaeEnabled() {
  4090  			ctxt.Diag("unsupported SAE: %v", p)
  4091  		}
  4092  		evexB = 1
  4093  	}
  4094  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4095  		evexV = 0
  4096  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4097  		evexV = 0 // VSR selector 5th bit.
  4098  	}
  4099  	if k != nil {
  4100  		evexA = byte(reg[k.Reg])
  4101  	}
  4102  	// P2 = [z][L'L][b][V'][aaa]
  4103  	p2 := (evexZ << 7) |
  4104  		(evexLL << 5) |
  4105  		(evexB << 4) |
  4106  		(evexV << 3) |
  4107  		(evexA << 0)
  4108  
  4109  	const evexEscapeByte = 0x62
  4110  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4111  	ab.Put1(evex.opcode)
  4112  }
  4113  
  4114  // Emit VEX prefix and opcode byte.
  4115  // The three addresses are the r/m, vvvv, and reg fields.
  4116  // The reg and rm arguments appear in the same order as the
  4117  // arguments to asmand, which typically follows the call to asmvex.
  4118  // The final two arguments are the VEX prefix (see encoding above)
  4119  // and the opcode byte.
  4120  // For details about vex prefix see:
  4121  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4122  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4123  	ab.vexflag = true
  4124  	rexR := 0
  4125  	if r != nil {
  4126  		rexR = regrex[r.Reg] & Rxr
  4127  	}
  4128  	rexB := 0
  4129  	rexX := 0
  4130  	if rm != nil {
  4131  		rexB = regrex[rm.Reg] & Rxb
  4132  		rexX = regrex[rm.Index] & Rxx
  4133  	}
  4134  	vexM := (vex >> 3) & 0x7
  4135  	vexWLP := vex & 0x87
  4136  	vexV := byte(0)
  4137  	if v != nil {
  4138  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4139  	}
  4140  	vexV ^= 0xF
  4141  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4142  		// Can use 2-byte encoding.
  4143  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4144  	} else {
  4145  		// Must use 3-byte encoding.
  4146  		ab.Put3(0xc4,
  4147  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4148  			vexV<<3|vexWLP,
  4149  		)
  4150  	}
  4151  	ab.Put1(opcode)
  4152  }
  4153  
  4154  // regIndex returns register index that fits in 5 bits.
  4155  //
  4156  //	R         : 3 bit | legacy instructions     | N/A
  4157  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4158  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4159  //
  4160  // Examples:
  4161  //	REG_Z30 => 30
  4162  //	REG_X15 => 15
  4163  //	REG_R9  => 9
  4164  //	REG_AX  => 0
  4165  //
  4166  func regIndex(r int16) int {
  4167  	lower3bits := reg[r]
  4168  	high4bit := regrex[r] & Rxr << 1
  4169  	high5bit := regrex[r] & RxrEvex << 0
  4170  	return lower3bits | high4bit | high5bit
  4171  }
  4172  
  4173  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4174  // Reports errors via ctxt.
  4175  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4176  	// If any pair of the index, mask, or destination registers
  4177  	// are the same, illegal instruction trap (#UD) is triggered.
  4178  	index := regIndex(p.GetFrom3().Index)
  4179  	mask := regIndex(p.From.Reg)
  4180  	dest := regIndex(p.To.Reg)
  4181  	if dest == mask || dest == index || mask == index {
  4182  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4183  		return false
  4184  	}
  4185  
  4186  	return true
  4187  }
  4188  
  4189  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4190  // Reports errors via ctxt.
  4191  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4192  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4193  	// register is the same as index vector in VSIB.
  4194  	index := regIndex(p.From.Index)
  4195  	dest := regIndex(p.To.Reg)
  4196  	if dest == index {
  4197  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4198  		return false
  4199  	}
  4200  
  4201  	return true
  4202  }
  4203  
  4204  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4205  	o := opindex[p.As&obj.AMask]
  4206  
  4207  	if o == nil {
  4208  		ctxt.Diag("asmins: missing op %v", p)
  4209  		return
  4210  	}
  4211  
  4212  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4213  		ab.Put1(byte(pre))
  4214  	}
  4215  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4216  		ab.Put1(byte(pre))
  4217  	}
  4218  
  4219  	// Checks to warn about instruction/arguments combinations that
  4220  	// will unconditionally trigger illegal instruction trap (#UD).
  4221  	switch p.As {
  4222  	case AVGATHERDPD,
  4223  		AVGATHERQPD,
  4224  		AVGATHERDPS,
  4225  		AVGATHERQPS,
  4226  		AVPGATHERDD,
  4227  		AVPGATHERQD,
  4228  		AVPGATHERDQ,
  4229  		AVPGATHERQQ:
  4230  		// AVX512 gather requires explicit K mask.
  4231  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4232  			if !avx512gatherValid(ctxt, p) {
  4233  				return
  4234  			}
  4235  		} else {
  4236  			if !avx2gatherValid(ctxt, p) {
  4237  				return
  4238  			}
  4239  		}
  4240  	}
  4241  
  4242  	if p.Ft == 0 {
  4243  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4244  	}
  4245  	if p.Tt == 0 {
  4246  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4247  	}
  4248  
  4249  	ft := int(p.Ft) * Ymax
  4250  	var f3t int
  4251  	tt := int(p.Tt) * Ymax
  4252  
  4253  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4254  	z := 0
  4255  	var a *obj.Addr
  4256  	var l int
  4257  	var op int
  4258  	var q *obj.Prog
  4259  	var r *obj.Reloc
  4260  	var rel obj.Reloc
  4261  	var v int64
  4262  
  4263  	args := make([]int, 0, argListMax)
  4264  	if ft != Ynone*Ymax {
  4265  		args = append(args, ft)
  4266  	}
  4267  	for i := range p.RestArgs {
  4268  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4269  	}
  4270  	if tt != Ynone*Ymax {
  4271  		args = append(args, tt)
  4272  	}
  4273  
  4274  	for _, yt := range o.ytab {
  4275  		// ytab matching is purely args-based,
  4276  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4277  		// add EVEX-only filter that will reject non-EVEX matches.
  4278  		//
  4279  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4280  		// Without this rule, operands will lead to VEX-encoded form
  4281  		// and produce "c5b15813" encoding.
  4282  		if !yt.match(args) {
  4283  			// "xo" is always zero for VEX/EVEX encoded insts.
  4284  			z += int(yt.zoffset) + xo
  4285  		} else {
  4286  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4287  				// Do not signal error and continue to search
  4288  				// for matching EVEX-encoded form.
  4289  				z += int(yt.zoffset)
  4290  				continue
  4291  			}
  4292  
  4293  			switch o.prefix {
  4294  			case Px1: // first option valid only in 32-bit mode
  4295  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4296  					z += int(yt.zoffset) + xo
  4297  					continue
  4298  				}
  4299  			case Pq: // 16 bit escape and opcode escape
  4300  				ab.Put2(Pe, Pm)
  4301  
  4302  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4303  				ab.rexflag |= Pw
  4304  				ab.Put2(Pe, Pm)
  4305  
  4306  			case Pq4: // 66 0F 38
  4307  				ab.Put3(0x66, 0x0F, 0x38)
  4308  
  4309  			case Pq4w: // 66 0F 38 + REX.W
  4310  				ab.rexflag |= Pw
  4311  				ab.Put3(0x66, 0x0F, 0x38)
  4312  
  4313  			case Pq5: // F3 0F 38
  4314  				ab.Put3(0xF3, 0x0F, 0x38)
  4315  
  4316  			case Pq5w: //  F3 0F 38 + REX.W
  4317  				ab.rexflag |= Pw
  4318  				ab.Put3(0xF3, 0x0F, 0x38)
  4319  
  4320  			case Pf2, // xmm opcode escape
  4321  				Pf3:
  4322  				ab.Put2(o.prefix, Pm)
  4323  
  4324  			case Pef3:
  4325  				ab.Put3(Pe, Pf3, Pm)
  4326  
  4327  			case Pfw: // xmm opcode escape + REX.W
  4328  				ab.rexflag |= Pw
  4329  				ab.Put2(Pf3, Pm)
  4330  
  4331  			case Pm: // opcode escape
  4332  				ab.Put1(Pm)
  4333  
  4334  			case Pe: // 16 bit escape
  4335  				ab.Put1(Pe)
  4336  
  4337  			case Pw: // 64-bit escape
  4338  				if ctxt.Arch.Family != sys.AMD64 {
  4339  					ctxt.Diag("asmins: illegal 64: %v", p)
  4340  				}
  4341  				ab.rexflag |= Pw
  4342  
  4343  			case Pw8: // 64-bit escape if z >= 8
  4344  				if z >= 8 {
  4345  					if ctxt.Arch.Family != sys.AMD64 {
  4346  						ctxt.Diag("asmins: illegal 64: %v", p)
  4347  					}
  4348  					ab.rexflag |= Pw
  4349  				}
  4350  
  4351  			case Pb: // botch
  4352  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4353  					goto bad
  4354  				}
  4355  				// NOTE(rsc): This is probably safe to do always,
  4356  				// but when enabled it chooses different encodings
  4357  				// than the old cmd/internal/obj/i386 code did,
  4358  				// which breaks our "same bits out" checks.
  4359  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4360  				// in the original obj/i386, and it would encode
  4361  				// (using a valid, shorter form) as 3c 00 if we enabled
  4362  				// the call to bytereg here.
  4363  				if ctxt.Arch.Family == sys.AMD64 {
  4364  					bytereg(&p.From, &p.Ft)
  4365  					bytereg(&p.To, &p.Tt)
  4366  				}
  4367  
  4368  			case P32: // 32 bit but illegal if 64-bit mode
  4369  				if ctxt.Arch.Family == sys.AMD64 {
  4370  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4371  				}
  4372  
  4373  			case Py: // 64-bit only, no prefix
  4374  				if ctxt.Arch.Family != sys.AMD64 {
  4375  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4376  				}
  4377  
  4378  			case Py1: // 64-bit only if z < 1, no prefix
  4379  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4380  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4381  				}
  4382  
  4383  			case Py3: // 64-bit only if z < 3, no prefix
  4384  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4385  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4386  				}
  4387  			}
  4388  
  4389  			if z >= len(o.op) {
  4390  				log.Fatalf("asmins bad table %v", p)
  4391  			}
  4392  			op = int(o.op[z])
  4393  			if op == 0x0f {
  4394  				ab.Put1(byte(op))
  4395  				z++
  4396  				op = int(o.op[z])
  4397  			}
  4398  
  4399  			switch yt.zcase {
  4400  			default:
  4401  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4402  				return
  4403  
  4404  			case Zpseudo:
  4405  				break
  4406  
  4407  			case Zlit:
  4408  				ab.PutOpBytesLit(z, &o.op)
  4409  
  4410  			case Zlitr_m:
  4411  				ab.PutOpBytesLit(z, &o.op)
  4412  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4413  
  4414  			case Zlitm_r:
  4415  				ab.PutOpBytesLit(z, &o.op)
  4416  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4417  
  4418  			case Zlit_m_r:
  4419  				ab.PutOpBytesLit(z, &o.op)
  4420  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4421  
  4422  			case Zmb_r:
  4423  				bytereg(&p.From, &p.Ft)
  4424  				fallthrough
  4425  
  4426  			case Zm_r:
  4427  				ab.Put1(byte(op))
  4428  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4429  
  4430  			case Z_m_r:
  4431  				ab.Put1(byte(op))
  4432  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4433  
  4434  			case Zm2_r:
  4435  				ab.Put2(byte(op), o.op[z+1])
  4436  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4437  
  4438  			case Zm_r_xm:
  4439  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4440  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4441  
  4442  			case Zm_r_xm_nr:
  4443  				ab.rexflag = 0
  4444  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4445  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4446  
  4447  			case Zm_r_i_xm:
  4448  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4449  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4450  				ab.Put1(byte(p.To.Offset))
  4451  
  4452  			case Zibm_r, Zibr_m:
  4453  				ab.PutOpBytesLit(z, &o.op)
  4454  				if yt.zcase == Zibr_m {
  4455  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4456  				} else {
  4457  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4458  				}
  4459  				switch {
  4460  				default:
  4461  					ab.Put1(byte(p.From.Offset))
  4462  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4463  					ab.PutInt16(int16(p.From.Offset))
  4464  				case yt.args[0] == Yi32:
  4465  					ab.PutInt32(int32(p.From.Offset))
  4466  				}
  4467  
  4468  			case Zaut_r:
  4469  				ab.Put1(0x8d) // leal
  4470  				if p.From.Type != obj.TYPE_ADDR {
  4471  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4472  				}
  4473  				p.From.Type = obj.TYPE_MEM
  4474  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4475  				p.From.Type = obj.TYPE_ADDR
  4476  
  4477  			case Zm_o:
  4478  				ab.Put1(byte(op))
  4479  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4480  
  4481  			case Zr_m:
  4482  				ab.Put1(byte(op))
  4483  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4484  
  4485  			case Zvex:
  4486  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4487  
  4488  			case Zvex_rm_v_r:
  4489  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4490  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4491  
  4492  			case Zvex_rm_v_ro:
  4493  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4494  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4495  
  4496  			case Zvex_i_rm_vo:
  4497  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4498  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4499  				ab.Put1(byte(p.From.Offset))
  4500  
  4501  			case Zvex_i_r_v:
  4502  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4503  				regnum := byte(0x7)
  4504  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4505  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4506  				} else {
  4507  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4508  				}
  4509  				ab.Put1(o.op[z+2] | regnum)
  4510  				ab.Put1(byte(p.From.Offset))
  4511  
  4512  			case Zvex_i_rm_v_r:
  4513  				imm, from, from3, to := unpackOps4(p)
  4514  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4515  				ab.asmand(ctxt, cursym, p, from, to)
  4516  				ab.Put1(byte(imm.Offset))
  4517  
  4518  			case Zvex_i_rm_r:
  4519  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4520  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4521  				ab.Put1(byte(p.From.Offset))
  4522  
  4523  			case Zvex_v_rm_r:
  4524  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4525  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4526  
  4527  			case Zvex_r_v_rm:
  4528  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4529  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4530  
  4531  			case Zvex_rm_r_vo:
  4532  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4533  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4534  
  4535  			case Zvex_i_r_rm:
  4536  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4537  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4538  				ab.Put1(byte(p.From.Offset))
  4539  
  4540  			case Zvex_hr_rm_v_r:
  4541  				hr, from, from3, to := unpackOps4(p)
  4542  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4543  				ab.asmand(ctxt, cursym, p, from, to)
  4544  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4545  
  4546  			case Zevex_k_rmo:
  4547  				ab.evex = newEVEXBits(z, &o.op)
  4548  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4549  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4550  
  4551  			case Zevex_i_rm_vo:
  4552  				ab.evex = newEVEXBits(z, &o.op)
  4553  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4554  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4555  				ab.Put1(byte(p.From.Offset))
  4556  
  4557  			case Zevex_i_rm_k_vo:
  4558  				imm, from, kmask, to := unpackOps4(p)
  4559  				ab.evex = newEVEXBits(z, &o.op)
  4560  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4561  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4562  				ab.Put1(byte(imm.Offset))
  4563  
  4564  			case Zevex_i_r_rm:
  4565  				ab.evex = newEVEXBits(z, &o.op)
  4566  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4567  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4568  				ab.Put1(byte(p.From.Offset))
  4569  
  4570  			case Zevex_i_r_k_rm:
  4571  				imm, from, kmask, to := unpackOps4(p)
  4572  				ab.evex = newEVEXBits(z, &o.op)
  4573  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4574  				ab.asmand(ctxt, cursym, p, to, from)
  4575  				ab.Put1(byte(imm.Offset))
  4576  
  4577  			case Zevex_i_rm_r:
  4578  				ab.evex = newEVEXBits(z, &o.op)
  4579  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4580  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4581  				ab.Put1(byte(p.From.Offset))
  4582  
  4583  			case Zevex_i_rm_k_r:
  4584  				imm, from, kmask, to := unpackOps4(p)
  4585  				ab.evex = newEVEXBits(z, &o.op)
  4586  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4587  				ab.asmand(ctxt, cursym, p, from, to)
  4588  				ab.Put1(byte(imm.Offset))
  4589  
  4590  			case Zevex_i_rm_v_r:
  4591  				imm, from, from3, to := unpackOps4(p)
  4592  				ab.evex = newEVEXBits(z, &o.op)
  4593  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4594  				ab.asmand(ctxt, cursym, p, from, to)
  4595  				ab.Put1(byte(imm.Offset))
  4596  
  4597  			case Zevex_i_rm_v_k_r:
  4598  				imm, from, from3, kmask, to := unpackOps5(p)
  4599  				ab.evex = newEVEXBits(z, &o.op)
  4600  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4601  				ab.asmand(ctxt, cursym, p, from, to)
  4602  				ab.Put1(byte(imm.Offset))
  4603  
  4604  			case Zevex_r_v_rm:
  4605  				ab.evex = newEVEXBits(z, &o.op)
  4606  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4607  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4608  
  4609  			case Zevex_rm_v_r:
  4610  				ab.evex = newEVEXBits(z, &o.op)
  4611  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4612  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4613  
  4614  			case Zevex_rm_k_r:
  4615  				ab.evex = newEVEXBits(z, &o.op)
  4616  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4617  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4618  
  4619  			case Zevex_r_k_rm:
  4620  				ab.evex = newEVEXBits(z, &o.op)
  4621  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4622  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4623  
  4624  			case Zevex_rm_v_k_r:
  4625  				from, from3, kmask, to := unpackOps4(p)
  4626  				ab.evex = newEVEXBits(z, &o.op)
  4627  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4628  				ab.asmand(ctxt, cursym, p, from, to)
  4629  
  4630  			case Zevex_r_v_k_rm:
  4631  				from, from3, kmask, to := unpackOps4(p)
  4632  				ab.evex = newEVEXBits(z, &o.op)
  4633  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4634  				ab.asmand(ctxt, cursym, p, to, from)
  4635  
  4636  			case Zr_m_xm:
  4637  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4638  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4639  
  4640  			case Zr_m_xm_nr:
  4641  				ab.rexflag = 0
  4642  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4643  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4644  
  4645  			case Zo_m:
  4646  				ab.Put1(byte(op))
  4647  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4648  
  4649  			case Zcallindreg:
  4650  				r = obj.Addrel(cursym)
  4651  				r.Off = int32(p.Pc)
  4652  				r.Type = objabi.R_CALLIND
  4653  				r.Siz = 0
  4654  				fallthrough
  4655  
  4656  			case Zo_m64:
  4657  				ab.Put1(byte(op))
  4658  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4659  
  4660  			case Zm_ibo:
  4661  				ab.Put1(byte(op))
  4662  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4663  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4664  
  4665  			case Zibo_m:
  4666  				ab.Put1(byte(op))
  4667  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4668  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4669  
  4670  			case Zibo_m_xm:
  4671  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4672  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4673  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4674  
  4675  			case Z_ib, Zib_:
  4676  				if yt.zcase == Zib_ {
  4677  					a = &p.From
  4678  				} else {
  4679  					a = &p.To
  4680  				}
  4681  				ab.Put1(byte(op))
  4682  				if p.As == AXABORT {
  4683  					ab.Put1(o.op[z+1])
  4684  				}
  4685  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4686  
  4687  			case Zib_rp:
  4688  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4689  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4690  
  4691  			case Zil_rp:
  4692  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4693  				ab.Put1(byte(op + reg[p.To.Reg]))
  4694  				if o.prefix == Pe {
  4695  					v = vaddr(ctxt, p, &p.From, nil)
  4696  					ab.PutInt16(int16(v))
  4697  				} else {
  4698  					ab.relput4(ctxt, cursym, p, &p.From)
  4699  				}
  4700  
  4701  			case Zo_iw:
  4702  				ab.Put1(byte(op))
  4703  				if p.From.Type != obj.TYPE_NONE {
  4704  					v = vaddr(ctxt, p, &p.From, nil)
  4705  					ab.PutInt16(int16(v))
  4706  				}
  4707  
  4708  			case Ziq_rp:
  4709  				v = vaddr(ctxt, p, &p.From, &rel)
  4710  				l = int(v >> 32)
  4711  				if l == 0 && rel.Siz != 8 {
  4712  					ab.rexflag &^= (0x40 | Rxw)
  4713  
  4714  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4715  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4716  					if rel.Type != 0 {
  4717  						r = obj.Addrel(cursym)
  4718  						*r = rel
  4719  						r.Off = int32(p.Pc + int64(ab.Len()))
  4720  					}
  4721  
  4722  					ab.PutInt32(int32(v))
  4723  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4724  					ab.Put1(0xc7)
  4725  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4726  
  4727  					ab.PutInt32(int32(v)) // need all 8
  4728  				} else {
  4729  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4730  					ab.Put1(byte(op + reg[p.To.Reg]))
  4731  					if rel.Type != 0 {
  4732  						r = obj.Addrel(cursym)
  4733  						*r = rel
  4734  						r.Off = int32(p.Pc + int64(ab.Len()))
  4735  					}
  4736  
  4737  					ab.PutInt64(v)
  4738  				}
  4739  
  4740  			case Zib_rr:
  4741  				ab.Put1(byte(op))
  4742  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4743  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4744  
  4745  			case Z_il, Zil_:
  4746  				if yt.zcase == Zil_ {
  4747  					a = &p.From
  4748  				} else {
  4749  					a = &p.To
  4750  				}
  4751  				ab.Put1(byte(op))
  4752  				if o.prefix == Pe {
  4753  					v = vaddr(ctxt, p, a, nil)
  4754  					ab.PutInt16(int16(v))
  4755  				} else {
  4756  					ab.relput4(ctxt, cursym, p, a)
  4757  				}
  4758  
  4759  			case Zm_ilo, Zilo_m:
  4760  				ab.Put1(byte(op))
  4761  				if yt.zcase == Zilo_m {
  4762  					a = &p.From
  4763  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4764  				} else {
  4765  					a = &p.To
  4766  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4767  				}
  4768  
  4769  				if o.prefix == Pe {
  4770  					v = vaddr(ctxt, p, a, nil)
  4771  					ab.PutInt16(int16(v))
  4772  				} else {
  4773  					ab.relput4(ctxt, cursym, p, a)
  4774  				}
  4775  
  4776  			case Zil_rr:
  4777  				ab.Put1(byte(op))
  4778  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4779  				if o.prefix == Pe {
  4780  					v = vaddr(ctxt, p, &p.From, nil)
  4781  					ab.PutInt16(int16(v))
  4782  				} else {
  4783  					ab.relput4(ctxt, cursym, p, &p.From)
  4784  				}
  4785  
  4786  			case Z_rp:
  4787  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4788  				ab.Put1(byte(op + reg[p.To.Reg]))
  4789  
  4790  			case Zrp_:
  4791  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4792  				ab.Put1(byte(op + reg[p.From.Reg]))
  4793  
  4794  			case Zcallcon, Zjmpcon:
  4795  				if yt.zcase == Zcallcon {
  4796  					ab.Put1(byte(op))
  4797  				} else {
  4798  					ab.Put1(o.op[z+1])
  4799  				}
  4800  				r = obj.Addrel(cursym)
  4801  				r.Off = int32(p.Pc + int64(ab.Len()))
  4802  				r.Type = objabi.R_PCREL
  4803  				r.Siz = 4
  4804  				r.Add = p.To.Offset
  4805  				ab.PutInt32(0)
  4806  
  4807  			case Zcallind:
  4808  				ab.Put2(byte(op), o.op[z+1])
  4809  				r = obj.Addrel(cursym)
  4810  				r.Off = int32(p.Pc + int64(ab.Len()))
  4811  				if ctxt.Arch.Family == sys.AMD64 {
  4812  					r.Type = objabi.R_PCREL
  4813  				} else {
  4814  					r.Type = objabi.R_ADDR
  4815  				}
  4816  				r.Siz = 4
  4817  				r.Add = p.To.Offset
  4818  				r.Sym = p.To.Sym
  4819  				ab.PutInt32(0)
  4820  
  4821  			case Zcall, Zcallduff:
  4822  				if p.To.Sym == nil {
  4823  					ctxt.Diag("call without target")
  4824  					ctxt.DiagFlush()
  4825  					log.Fatalf("bad code")
  4826  				}
  4827  
  4828  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4829  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4830  				}
  4831  
  4832  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4833  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4834  					// (the call jumps into the middle of the function).
  4835  					// This makes it possible to see call sites for duffcopy/duffzero in
  4836  					// BP-based profiling tools like Linux perf (which is the
  4837  					// whole point of maintaining frame pointers in Go).
  4838  					// MOVQ BP, -16(SP)
  4839  					// LEAQ -16(SP), BP
  4840  					ab.Put(bpduff1)
  4841  				}
  4842  				ab.Put1(byte(op))
  4843  				r = obj.Addrel(cursym)
  4844  				r.Off = int32(p.Pc + int64(ab.Len()))
  4845  				r.Sym = p.To.Sym
  4846  				r.Add = p.To.Offset
  4847  				r.Type = objabi.R_CALL
  4848  				r.Siz = 4
  4849  				ab.PutInt32(0)
  4850  
  4851  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4852  					// Pop BP pushed above.
  4853  					// MOVQ 0(BP), BP
  4854  					ab.Put(bpduff2)
  4855  				}
  4856  
  4857  			// TODO: jump across functions needs reloc
  4858  			case Zbr, Zjmp, Zloop:
  4859  				if p.As == AXBEGIN {
  4860  					ab.Put1(byte(op))
  4861  				}
  4862  				if p.To.Sym != nil {
  4863  					if yt.zcase != Zjmp {
  4864  						ctxt.Diag("branch to ATEXT")
  4865  						ctxt.DiagFlush()
  4866  						log.Fatalf("bad code")
  4867  					}
  4868  
  4869  					ab.Put1(o.op[z+1])
  4870  					r = obj.Addrel(cursym)
  4871  					r.Off = int32(p.Pc + int64(ab.Len()))
  4872  					r.Sym = p.To.Sym
  4873  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4874  					// it can point to a trampoline instead of the destination itself.
  4875  					r.Type = objabi.R_CALL
  4876  					r.Siz = 4
  4877  					ab.PutInt32(0)
  4878  					break
  4879  				}
  4880  
  4881  				// Assumes q is in this function.
  4882  				// TODO: Check in input, preserve in brchain.
  4883  
  4884  				// Fill in backward jump now.
  4885  				q = p.To.Target()
  4886  
  4887  				if q == nil {
  4888  					ctxt.Diag("jmp/branch/loop without target")
  4889  					ctxt.DiagFlush()
  4890  					log.Fatalf("bad code")
  4891  				}
  4892  
  4893  				if p.Back&branchBackwards != 0 {
  4894  					v = q.Pc - (p.Pc + 2)
  4895  					if v >= -128 && p.As != AXBEGIN {
  4896  						if p.As == AJCXZL {
  4897  							ab.Put1(0x67)
  4898  						}
  4899  						ab.Put2(byte(op), byte(v))
  4900  					} else if yt.zcase == Zloop {
  4901  						ctxt.Diag("loop too far: %v", p)
  4902  					} else {
  4903  						v -= 5 - 2
  4904  						if p.As == AXBEGIN {
  4905  							v--
  4906  						}
  4907  						if yt.zcase == Zbr {
  4908  							ab.Put1(0x0f)
  4909  							v--
  4910  						}
  4911  
  4912  						ab.Put1(o.op[z+1])
  4913  						ab.PutInt32(int32(v))
  4914  					}
  4915  
  4916  					break
  4917  				}
  4918  
  4919  				// Annotate target; will fill in later.
  4920  				p.Forwd = q.Rel
  4921  
  4922  				q.Rel = p
  4923  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4924  					if p.As == AJCXZL {
  4925  						ab.Put1(0x67)
  4926  					}
  4927  					ab.Put2(byte(op), 0)
  4928  				} else if yt.zcase == Zloop {
  4929  					ctxt.Diag("loop too far: %v", p)
  4930  				} else {
  4931  					if yt.zcase == Zbr {
  4932  						ab.Put1(0x0f)
  4933  					}
  4934  					ab.Put1(o.op[z+1])
  4935  					ab.PutInt32(0)
  4936  				}
  4937  
  4938  			case Zbyte:
  4939  				v = vaddr(ctxt, p, &p.From, &rel)
  4940  				if rel.Siz != 0 {
  4941  					rel.Siz = uint8(op)
  4942  					r = obj.Addrel(cursym)
  4943  					*r = rel
  4944  					r.Off = int32(p.Pc + int64(ab.Len()))
  4945  				}
  4946  
  4947  				ab.Put1(byte(v))
  4948  				if op > 1 {
  4949  					ab.Put1(byte(v >> 8))
  4950  					if op > 2 {
  4951  						ab.PutInt16(int16(v >> 16))
  4952  						if op > 4 {
  4953  							ab.PutInt32(int32(v >> 32))
  4954  						}
  4955  					}
  4956  				}
  4957  			}
  4958  
  4959  			return
  4960  		}
  4961  	}
  4962  	f3t = Ynone * Ymax
  4963  	if p.GetFrom3() != nil {
  4964  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4965  	}
  4966  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4967  		var pp obj.Prog
  4968  		var t []byte
  4969  		if p.As == mo[0].as {
  4970  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4971  				t = mo[0].op[:]
  4972  				switch mo[0].code {
  4973  				default:
  4974  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4975  
  4976  				case movLit:
  4977  					for z = 0; t[z] != 0; z++ {
  4978  						ab.Put1(t[z])
  4979  					}
  4980  
  4981  				case movRegMem:
  4982  					ab.Put1(t[0])
  4983  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4984  
  4985  				case movMemReg:
  4986  					ab.Put1(t[0])
  4987  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4988  
  4989  				case movRegMem2op: // r,m - 2op
  4990  					ab.Put2(t[0], t[1])
  4991  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4992  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4993  
  4994  				case movMemReg2op:
  4995  					ab.Put2(t[0], t[1])
  4996  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4997  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4998  
  4999  				case movFullPtr:
  5000  					if t[0] != 0 {
  5001  						ab.Put1(t[0])
  5002  					}
  5003  					switch p.To.Index {
  5004  					default:
  5005  						goto bad
  5006  
  5007  					case REG_DS:
  5008  						ab.Put1(0xc5)
  5009  
  5010  					case REG_SS:
  5011  						ab.Put2(0x0f, 0xb2)
  5012  
  5013  					case REG_ES:
  5014  						ab.Put1(0xc4)
  5015  
  5016  					case REG_FS:
  5017  						ab.Put2(0x0f, 0xb4)
  5018  
  5019  					case REG_GS:
  5020  						ab.Put2(0x0f, 0xb5)
  5021  					}
  5022  
  5023  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5024  
  5025  				case movDoubleShift:
  5026  					if t[0] == Pw {
  5027  						if ctxt.Arch.Family != sys.AMD64 {
  5028  							ctxt.Diag("asmins: illegal 64: %v", p)
  5029  						}
  5030  						ab.rexflag |= Pw
  5031  						t = t[1:]
  5032  					} else if t[0] == Pe {
  5033  						ab.Put1(Pe)
  5034  						t = t[1:]
  5035  					}
  5036  
  5037  					switch p.From.Type {
  5038  					default:
  5039  						goto bad
  5040  
  5041  					case obj.TYPE_CONST:
  5042  						ab.Put2(0x0f, t[0])
  5043  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5044  						ab.Put1(byte(p.From.Offset))
  5045  
  5046  					case obj.TYPE_REG:
  5047  						switch p.From.Reg {
  5048  						default:
  5049  							goto bad
  5050  
  5051  						case REG_CL, REG_CX:
  5052  							ab.Put2(0x0f, t[1])
  5053  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5054  						}
  5055  					}
  5056  
  5057  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5058  				// where you load the TLS base register into a register and then index off that
  5059  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5060  				// are handled in prefixof above and should not be listed here.
  5061  				case movTLSReg:
  5062  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5063  						ctxt.Diag("invalid load of TLS: %v", p)
  5064  					}
  5065  
  5066  					if ctxt.Arch.Family == sys.I386 {
  5067  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5068  						// where you load the TLS base register into a register and then index off that
  5069  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5070  						// are handled in prefixof above and should not be listed here.
  5071  						switch ctxt.Headtype {
  5072  						default:
  5073  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5074  
  5075  						case objabi.Hlinux, objabi.Hfreebsd:
  5076  							if ctxt.Flag_shared {
  5077  								// Note that this is not generating the same insns as the other cases.
  5078  								//     MOV TLS, dst
  5079  								// becomes
  5080  								//     call __x86.get_pc_thunk.dst
  5081  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5082  								// which is encoded as
  5083  								//     call __x86.get_pc_thunk.dst
  5084  								//     movq 0(dst), dst
  5085  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5086  								// is g, which we can't check here, but will when we assemble the second
  5087  								// instruction.
  5088  								dst := p.To.Reg
  5089  								ab.Put1(0xe8)
  5090  								r = obj.Addrel(cursym)
  5091  								r.Off = int32(p.Pc + int64(ab.Len()))
  5092  								r.Type = objabi.R_CALL
  5093  								r.Siz = 4
  5094  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  5095  								ab.PutInt32(0)
  5096  
  5097  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5098  								r = obj.Addrel(cursym)
  5099  								r.Off = int32(p.Pc + int64(ab.Len()))
  5100  								r.Type = objabi.R_TLS_IE
  5101  								r.Siz = 4
  5102  								r.Add = 2
  5103  								ab.PutInt32(0)
  5104  							} else {
  5105  								// ELF TLS base is 0(GS).
  5106  								pp.From = p.From
  5107  
  5108  								pp.From.Type = obj.TYPE_MEM
  5109  								pp.From.Reg = REG_GS
  5110  								pp.From.Offset = 0
  5111  								pp.From.Index = REG_NONE
  5112  								pp.From.Scale = 0
  5113  								ab.Put2(0x65, // GS
  5114  									0x8B)
  5115  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5116  							}
  5117  						case objabi.Hplan9:
  5118  							pp.From = obj.Addr{}
  5119  							pp.From.Type = obj.TYPE_MEM
  5120  							pp.From.Name = obj.NAME_EXTERN
  5121  							pp.From.Sym = plan9privates
  5122  							pp.From.Offset = 0
  5123  							pp.From.Index = REG_NONE
  5124  							ab.Put1(0x8B)
  5125  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5126  
  5127  						case objabi.Hwindows:
  5128  							// Windows TLS base is always 0x14(FS).
  5129  							pp.From = p.From
  5130  
  5131  							pp.From.Type = obj.TYPE_MEM
  5132  							pp.From.Reg = REG_FS
  5133  							pp.From.Offset = 0x14
  5134  							pp.From.Index = REG_NONE
  5135  							pp.From.Scale = 0
  5136  							ab.Put2(0x64, // FS
  5137  								0x8B)
  5138  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5139  						}
  5140  						break
  5141  					}
  5142  
  5143  					switch ctxt.Headtype {
  5144  					default:
  5145  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5146  
  5147  					case objabi.Hlinux, objabi.Hfreebsd:
  5148  						if !ctxt.Flag_shared {
  5149  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5150  						}
  5151  						// Note that this is not generating the same insn as the other cases.
  5152  						//     MOV TLS, R_to
  5153  						// becomes
  5154  						//     movq g@gottpoff(%rip), R_to
  5155  						// which is encoded as
  5156  						//     movq 0(%rip), R_to
  5157  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5158  						// is g, which we can't check here, but will when we assemble the second
  5159  						// instruction.
  5160  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5161  
  5162  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5163  						r = obj.Addrel(cursym)
  5164  						r.Off = int32(p.Pc + int64(ab.Len()))
  5165  						r.Type = objabi.R_TLS_IE
  5166  						r.Siz = 4
  5167  						r.Add = -4
  5168  						ab.PutInt32(0)
  5169  
  5170  					case objabi.Hplan9:
  5171  						pp.From = obj.Addr{}
  5172  						pp.From.Type = obj.TYPE_MEM
  5173  						pp.From.Name = obj.NAME_EXTERN
  5174  						pp.From.Sym = plan9privates
  5175  						pp.From.Offset = 0
  5176  						pp.From.Index = REG_NONE
  5177  						ab.rexflag |= Pw
  5178  						ab.Put1(0x8B)
  5179  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5180  
  5181  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5182  						// TLS base is 0(FS).
  5183  						pp.From = p.From
  5184  
  5185  						pp.From.Type = obj.TYPE_MEM
  5186  						pp.From.Name = obj.NAME_NONE
  5187  						pp.From.Reg = REG_NONE
  5188  						pp.From.Offset = 0
  5189  						pp.From.Index = REG_NONE
  5190  						pp.From.Scale = 0
  5191  						ab.rexflag |= Pw
  5192  						ab.Put2(0x64, // FS
  5193  							0x8B)
  5194  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5195  
  5196  					case objabi.Hwindows:
  5197  						// Windows TLS base is always 0x28(GS).
  5198  						pp.From = p.From
  5199  
  5200  						pp.From.Type = obj.TYPE_MEM
  5201  						pp.From.Name = obj.NAME_NONE
  5202  						pp.From.Reg = REG_GS
  5203  						pp.From.Offset = 0x28
  5204  						pp.From.Index = REG_NONE
  5205  						pp.From.Scale = 0
  5206  						ab.rexflag |= Pw
  5207  						ab.Put2(0x65, // GS
  5208  							0x8B)
  5209  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5210  					}
  5211  				}
  5212  				return
  5213  			}
  5214  		}
  5215  	}
  5216  	goto bad
  5217  
  5218  bad:
  5219  	if ctxt.Arch.Family != sys.AMD64 {
  5220  		// here, the assembly has failed.
  5221  		// if it's a byte instruction that has
  5222  		// unaddressable registers, try to
  5223  		// exchange registers and reissue the
  5224  		// instruction with the operands renamed.
  5225  		pp := *p
  5226  
  5227  		unbytereg(&pp.From, &pp.Ft)
  5228  		unbytereg(&pp.To, &pp.Tt)
  5229  
  5230  		z := int(p.From.Reg)
  5231  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5232  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5233  			// For now, different to keep bit-for-bit compatibility.
  5234  			if ctxt.Arch.Family == sys.I386 {
  5235  				breg := byteswapreg(ctxt, &p.To)
  5236  				if breg != REG_AX {
  5237  					ab.Put1(0x87) // xchg lhs,bx
  5238  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5239  					subreg(&pp, z, breg)
  5240  					ab.doasm(ctxt, cursym, &pp)
  5241  					ab.Put1(0x87) // xchg lhs,bx
  5242  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5243  				} else {
  5244  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5245  					subreg(&pp, z, REG_AX)
  5246  					ab.doasm(ctxt, cursym, &pp)
  5247  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5248  				}
  5249  				return
  5250  			}
  5251  
  5252  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5253  				// We certainly don't want to exchange
  5254  				// with AX if the op is MUL or DIV.
  5255  				ab.Put1(0x87) // xchg lhs,bx
  5256  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5257  				subreg(&pp, z, REG_BX)
  5258  				ab.doasm(ctxt, cursym, &pp)
  5259  				ab.Put1(0x87) // xchg lhs,bx
  5260  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5261  			} else {
  5262  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5263  				subreg(&pp, z, REG_AX)
  5264  				ab.doasm(ctxt, cursym, &pp)
  5265  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5266  			}
  5267  			return
  5268  		}
  5269  
  5270  		z = int(p.To.Reg)
  5271  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5272  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5273  			// For now, different to keep bit-for-bit compatibility.
  5274  			if ctxt.Arch.Family == sys.I386 {
  5275  				breg := byteswapreg(ctxt, &p.From)
  5276  				if breg != REG_AX {
  5277  					ab.Put1(0x87) //xchg rhs,bx
  5278  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5279  					subreg(&pp, z, breg)
  5280  					ab.doasm(ctxt, cursym, &pp)
  5281  					ab.Put1(0x87) // xchg rhs,bx
  5282  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5283  				} else {
  5284  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5285  					subreg(&pp, z, REG_AX)
  5286  					ab.doasm(ctxt, cursym, &pp)
  5287  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5288  				}
  5289  				return
  5290  			}
  5291  
  5292  			if isax(&p.From) {
  5293  				ab.Put1(0x87) // xchg rhs,bx
  5294  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5295  				subreg(&pp, z, REG_BX)
  5296  				ab.doasm(ctxt, cursym, &pp)
  5297  				ab.Put1(0x87) // xchg rhs,bx
  5298  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5299  			} else {
  5300  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5301  				subreg(&pp, z, REG_AX)
  5302  				ab.doasm(ctxt, cursym, &pp)
  5303  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5304  			}
  5305  			return
  5306  		}
  5307  	}
  5308  
  5309  	ctxt.Diag("invalid instruction: %v", p)
  5310  }
  5311  
  5312  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5313  // which is not referenced in a.
  5314  // If a is empty, it returns BX to account for MULB-like instructions
  5315  // that might use DX and AX.
  5316  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5317  	cana, canb, canc, cand := true, true, true, true
  5318  	if a.Type == obj.TYPE_NONE {
  5319  		cana, cand = false, false
  5320  	}
  5321  
  5322  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5323  		switch a.Reg {
  5324  		case REG_NONE:
  5325  			cana, cand = false, false
  5326  		case REG_AX, REG_AL, REG_AH:
  5327  			cana = false
  5328  		case REG_BX, REG_BL, REG_BH:
  5329  			canb = false
  5330  		case REG_CX, REG_CL, REG_CH:
  5331  			canc = false
  5332  		case REG_DX, REG_DL, REG_DH:
  5333  			cand = false
  5334  		}
  5335  	}
  5336  
  5337  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5338  		switch a.Index {
  5339  		case REG_AX:
  5340  			cana = false
  5341  		case REG_BX:
  5342  			canb = false
  5343  		case REG_CX:
  5344  			canc = false
  5345  		case REG_DX:
  5346  			cand = false
  5347  		}
  5348  	}
  5349  
  5350  	switch {
  5351  	case cana:
  5352  		return REG_AX
  5353  	case canb:
  5354  		return REG_BX
  5355  	case canc:
  5356  		return REG_CX
  5357  	case cand:
  5358  		return REG_DX
  5359  	default:
  5360  		ctxt.Diag("impossible byte register")
  5361  		ctxt.DiagFlush()
  5362  		log.Fatalf("bad code")
  5363  		return 0
  5364  	}
  5365  }
  5366  
  5367  func isbadbyte(a *obj.Addr) bool {
  5368  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5369  }
  5370  
  5371  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5372  	ab.Reset()
  5373  
  5374  	ab.rexflag = 0
  5375  	ab.vexflag = false
  5376  	ab.evexflag = false
  5377  	mark := ab.Len()
  5378  	ab.doasm(ctxt, cursym, p)
  5379  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5380  		// as befits the whole approach of the architecture,
  5381  		// the rex prefix must appear before the first opcode byte
  5382  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5383  		// before the 0f opcode escape!), or it might be ignored.
  5384  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5385  		if ctxt.Arch.Family != sys.AMD64 {
  5386  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5387  		}
  5388  		n := ab.Len()
  5389  		var np int
  5390  		for np = mark; np < n; np++ {
  5391  			c := ab.At(np)
  5392  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5393  				break
  5394  			}
  5395  		}
  5396  		ab.Insert(np, byte(0x40|ab.rexflag))
  5397  	}
  5398  
  5399  	n := ab.Len()
  5400  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5401  		r := &cursym.R[i]
  5402  		if int64(r.Off) < p.Pc {
  5403  			break
  5404  		}
  5405  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5406  			r.Off++
  5407  		}
  5408  		if r.Type == objabi.R_PCREL {
  5409  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5410  				// PC-relative addressing is relative to the end of the instruction,
  5411  				// but the relocations applied by the linker are relative to the end
  5412  				// of the relocation. Because immediate instruction
  5413  				// arguments can follow the PC-relative memory reference in the
  5414  				// instruction encoding, the two may not coincide. In this case,
  5415  				// adjust addend so that linker can keep relocating relative to the
  5416  				// end of the relocation.
  5417  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5418  			} else if ctxt.Arch.Family == sys.I386 {
  5419  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5420  				// assumes that the previous instruction loaded the PC of the end
  5421  				// of that instruction into CX, so the adjustment is relative to
  5422  				// that.
  5423  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5424  			}
  5425  		}
  5426  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5427  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5428  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5429  		}
  5430  
  5431  	}
  5432  }
  5433  
  5434  // unpackOps4 extracts 4 operands from p.
  5435  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5436  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5437  }
  5438  
  5439  // unpackOps5 extracts 5 operands from p.
  5440  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5441  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5442  }
  5443  

View as plain text