...

Source file src/github.com/twitchyliquid64/golang-asm/obj/x86/asm6.go

Documentation: github.com/twitchyliquid64/golang-asm/obj/x86

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"github.com/twitchyliquid64/golang-asm/obj"
    35  	"github.com/twitchyliquid64/golang-asm/objabi"
    36  	"github.com/twitchyliquid64/golang-asm/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  //
    63  const (
    64  	loopAlign  = 16
    65  	maxLoopPad = 0
    66  )
    67  
    68  // Bit flags that are used to express jump target properties.
    69  const (
    70  	// branchBackwards marks targets that are located behind.
    71  	// Used to express jumps to loop headers.
    72  	branchBackwards = (1 << iota)
    73  	// branchShort marks branches those target is close,
    74  	// with offset is in -128..127 range.
    75  	branchShort
    76  	// branchLoopHead marks loop entry.
    77  	// Used to insert padding for misaligned loops.
    78  	branchLoopHead
    79  )
    80  
    81  // opBytes holds optab encoding bytes.
    82  // Each ytab reserves fixed amount of bytes in this array.
    83  //
    84  // The size should be the minimal number of bytes that
    85  // are enough to hold biggest optab op lines.
    86  type opBytes [31]uint8
    87  
    88  type Optab struct {
    89  	as     obj.As
    90  	ytab   []ytab
    91  	prefix uint8
    92  	op     opBytes
    93  }
    94  
    95  type movtab struct {
    96  	as   obj.As
    97  	ft   uint8
    98  	f3t  uint8
    99  	tt   uint8
   100  	code uint8
   101  	op   [4]uint8
   102  }
   103  
   104  const (
   105  	Yxxx = iota
   106  	Ynone
   107  	Yi0 // $0
   108  	Yi1 // $1
   109  	Yu2 // $x, x fits in uint2
   110  	Yi8 // $x, x fits in int8
   111  	Yu8 // $x, x fits in uint8
   112  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   113  	Ys32
   114  	Yi32
   115  	Yi64
   116  	Yiauto
   117  	Yal
   118  	Ycl
   119  	Yax
   120  	Ycx
   121  	Yrb
   122  	Yrl
   123  	Yrl32 // Yrl on 32-bit system
   124  	Yrf
   125  	Yf0
   126  	Yrx
   127  	Ymb
   128  	Yml
   129  	Ym
   130  	Ybr
   131  	Ycs
   132  	Yss
   133  	Yds
   134  	Yes
   135  	Yfs
   136  	Ygs
   137  	Ygdtr
   138  	Yidtr
   139  	Yldtr
   140  	Ymsw
   141  	Ytask
   142  	Ycr0
   143  	Ycr1
   144  	Ycr2
   145  	Ycr3
   146  	Ycr4
   147  	Ycr5
   148  	Ycr6
   149  	Ycr7
   150  	Ycr8
   151  	Ydr0
   152  	Ydr1
   153  	Ydr2
   154  	Ydr3
   155  	Ydr4
   156  	Ydr5
   157  	Ydr6
   158  	Ydr7
   159  	Ytr0
   160  	Ytr1
   161  	Ytr2
   162  	Ytr3
   163  	Ytr4
   164  	Ytr5
   165  	Ytr6
   166  	Ytr7
   167  	Ymr
   168  	Ymm
   169  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   170  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   171  	Yxr           // X0..X15
   172  	YxrEvex       // X0..X31
   173  	Yxm
   174  	YxmEvex       // YxrEvex+Ym
   175  	Yxvm          // VSIB vector array; vm32x/vm64x
   176  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   177  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   178  	Yyr           // Y0..Y15
   179  	YyrEvex       // Y0..Y31
   180  	Yym
   181  	YymEvex   // YyrEvex+Ym
   182  	Yyvm      // VSIB vector array; vm32y/vm64y
   183  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   184  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   185  	Yzr       // Z0..Z31
   186  	Yzm       // Yzr+Ym
   187  	Yzvm      // VSIB vector array; vm32z/vm64z
   188  	Yk0       // K0
   189  	Yknot0    // K1..K7; write mask
   190  	Yk        // K0..K7; used for KOP
   191  	Ykm       // Yk+Ym; used for KOP
   192  	Ytls
   193  	Ytextsize
   194  	Yindir
   195  	Ymax
   196  )
   197  
   198  const (
   199  	Zxxx = iota
   200  	Zlit
   201  	Zlitm_r
   202  	Zlitr_m
   203  	Zlit_m_r
   204  	Z_rp
   205  	Zbr
   206  	Zcall
   207  	Zcallcon
   208  	Zcallduff
   209  	Zcallind
   210  	Zcallindreg
   211  	Zib_
   212  	Zib_rp
   213  	Zibo_m
   214  	Zibo_m_xm
   215  	Zil_
   216  	Zil_rp
   217  	Ziq_rp
   218  	Zilo_m
   219  	Zjmp
   220  	Zjmpcon
   221  	Zloop
   222  	Zo_iw
   223  	Zm_o
   224  	Zm_r
   225  	Z_m_r
   226  	Zm2_r
   227  	Zm_r_xm
   228  	Zm_r_i_xm
   229  	Zm_r_xm_nr
   230  	Zr_m_xm_nr
   231  	Zibm_r // mmx1,mmx2/mem64,imm8
   232  	Zibr_m
   233  	Zmb_r
   234  	Zaut_r
   235  	Zo_m
   236  	Zo_m64
   237  	Zpseudo
   238  	Zr_m
   239  	Zr_m_xm
   240  	Zrp_
   241  	Z_ib
   242  	Z_il
   243  	Zm_ibo
   244  	Zm_ilo
   245  	Zib_rr
   246  	Zil_rr
   247  	Zbyte
   248  
   249  	Zvex_rm_v_r
   250  	Zvex_rm_v_ro
   251  	Zvex_r_v_rm
   252  	Zvex_i_rm_vo
   253  	Zvex_v_rm_r
   254  	Zvex_i_rm_r
   255  	Zvex_i_r_v
   256  	Zvex_i_rm_v_r
   257  	Zvex
   258  	Zvex_rm_r_vo
   259  	Zvex_i_r_rm
   260  	Zvex_hr_rm_v_r
   261  
   262  	Zevex_first
   263  	Zevex_i_r_k_rm
   264  	Zevex_i_r_rm
   265  	Zevex_i_rm_k_r
   266  	Zevex_i_rm_k_vo
   267  	Zevex_i_rm_r
   268  	Zevex_i_rm_v_k_r
   269  	Zevex_i_rm_v_r
   270  	Zevex_i_rm_vo
   271  	Zevex_k_rmo
   272  	Zevex_r_k_rm
   273  	Zevex_r_v_k_rm
   274  	Zevex_r_v_rm
   275  	Zevex_rm_k_r
   276  	Zevex_rm_v_k_r
   277  	Zevex_rm_v_r
   278  	Zevex_last
   279  
   280  	Zmax
   281  )
   282  
   283  const (
   284  	Px   = 0
   285  	Px1  = 1    // symbolic; exact value doesn't matter
   286  	P32  = 0x32 // 32-bit only
   287  	Pe   = 0x66 // operand escape
   288  	Pm   = 0x0f // 2byte opcode escape
   289  	Pq   = 0xff // both escapes: 66 0f
   290  	Pb   = 0xfe // byte operands
   291  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   292  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   293  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   294  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   295  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   296  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   297  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   298  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   299  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   300  	Pw   = 0x48 // Rex.w
   301  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   302  	Py   = 0x80 // defaults to 64-bit mode
   303  	Py1  = 0x81 // symbolic; exact value doesn't matter
   304  	Py3  = 0x83 // symbolic; exact value doesn't matter
   305  	Pavx = 0x84 // symbolic: exact value doesn't matter
   306  
   307  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   308  	Rxw     = 1 << 3 // =1, 64-bit operand size
   309  	Rxr     = 1 << 2 // extend modrm reg
   310  	Rxx     = 1 << 1 // extend sib index
   311  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   312  )
   313  
   314  const (
   315  	// Encoding for VEX prefix in tables.
   316  	// The P, L, and W fields are chosen to match
   317  	// their eventual locations in the VEX prefix bytes.
   318  
   319  	// Encoding for VEX prefix in tables.
   320  	// The P, L, and W fields are chosen to match
   321  	// their eventual locations in the VEX prefix bytes.
   322  
   323  	// Using spare bit to make leading [E]VEX encoding byte different from
   324  	// 0x0f even if all other VEX fields are 0.
   325  	avxEscape = 1 << 6
   326  
   327  	// P field - 2 bits
   328  	vex66 = 1 << 0
   329  	vexF3 = 2 << 0
   330  	vexF2 = 3 << 0
   331  	// L field - 1 bit
   332  	vexLZ  = 0 << 2
   333  	vexLIG = 0 << 2
   334  	vex128 = 0 << 2
   335  	vex256 = 1 << 2
   336  	// W field - 1 bit
   337  	vexWIG = 0 << 7
   338  	vexW0  = 0 << 7
   339  	vexW1  = 1 << 7
   340  	// M field - 5 bits, but mostly reserved; we can store up to 3
   341  	vex0F   = 1 << 3
   342  	vex0F38 = 2 << 3
   343  	vex0F3A = 3 << 3
   344  )
   345  
   346  var ycover [Ymax * Ymax]uint8
   347  
   348  var reg [MAXREG]int
   349  
   350  var regrex [MAXREG + 1]int
   351  
   352  var ynone = []ytab{
   353  	{Zlit, 1, argList{}},
   354  }
   355  
   356  var ytext = []ytab{
   357  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   358  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   359  }
   360  
   361  var ynop = []ytab{
   362  	{Zpseudo, 0, argList{}},
   363  	{Zpseudo, 0, argList{Yiauto}},
   364  	{Zpseudo, 0, argList{Yml}},
   365  	{Zpseudo, 0, argList{Yrf}},
   366  	{Zpseudo, 0, argList{Yxr}},
   367  	{Zpseudo, 0, argList{Yiauto}},
   368  	{Zpseudo, 0, argList{Yml}},
   369  	{Zpseudo, 0, argList{Yrf}},
   370  	{Zpseudo, 1, argList{Yxr}},
   371  }
   372  
   373  var yfuncdata = []ytab{
   374  	{Zpseudo, 0, argList{Yi32, Ym}},
   375  }
   376  
   377  var ypcdata = []ytab{
   378  	{Zpseudo, 0, argList{Yi32, Yi32}},
   379  }
   380  
   381  var yxorb = []ytab{
   382  	{Zib_, 1, argList{Yi32, Yal}},
   383  	{Zibo_m, 2, argList{Yi32, Ymb}},
   384  	{Zr_m, 1, argList{Yrb, Ymb}},
   385  	{Zm_r, 1, argList{Ymb, Yrb}},
   386  }
   387  
   388  var yaddl = []ytab{
   389  	{Zibo_m, 2, argList{Yi8, Yml}},
   390  	{Zil_, 1, argList{Yi32, Yax}},
   391  	{Zilo_m, 2, argList{Yi32, Yml}},
   392  	{Zr_m, 1, argList{Yrl, Yml}},
   393  	{Zm_r, 1, argList{Yml, Yrl}},
   394  }
   395  
   396  var yincl = []ytab{
   397  	{Z_rp, 1, argList{Yrl}},
   398  	{Zo_m, 2, argList{Yml}},
   399  }
   400  
   401  var yincq = []ytab{
   402  	{Zo_m, 2, argList{Yml}},
   403  }
   404  
   405  var ycmpb = []ytab{
   406  	{Z_ib, 1, argList{Yal, Yi32}},
   407  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   408  	{Zm_r, 1, argList{Ymb, Yrb}},
   409  	{Zr_m, 1, argList{Yrb, Ymb}},
   410  }
   411  
   412  var ycmpl = []ytab{
   413  	{Zm_ibo, 2, argList{Yml, Yi8}},
   414  	{Z_il, 1, argList{Yax, Yi32}},
   415  	{Zm_ilo, 2, argList{Yml, Yi32}},
   416  	{Zm_r, 1, argList{Yml, Yrl}},
   417  	{Zr_m, 1, argList{Yrl, Yml}},
   418  }
   419  
   420  var yshb = []ytab{
   421  	{Zo_m, 2, argList{Yi1, Ymb}},
   422  	{Zibo_m, 2, argList{Yu8, Ymb}},
   423  	{Zo_m, 2, argList{Ycx, Ymb}},
   424  }
   425  
   426  var yshl = []ytab{
   427  	{Zo_m, 2, argList{Yi1, Yml}},
   428  	{Zibo_m, 2, argList{Yu8, Yml}},
   429  	{Zo_m, 2, argList{Ycl, Yml}},
   430  	{Zo_m, 2, argList{Ycx, Yml}},
   431  }
   432  
   433  var ytestl = []ytab{
   434  	{Zil_, 1, argList{Yi32, Yax}},
   435  	{Zilo_m, 2, argList{Yi32, Yml}},
   436  	{Zr_m, 1, argList{Yrl, Yml}},
   437  	{Zm_r, 1, argList{Yml, Yrl}},
   438  }
   439  
   440  var ymovb = []ytab{
   441  	{Zr_m, 1, argList{Yrb, Ymb}},
   442  	{Zm_r, 1, argList{Ymb, Yrb}},
   443  	{Zib_rp, 1, argList{Yi32, Yrb}},
   444  	{Zibo_m, 2, argList{Yi32, Ymb}},
   445  }
   446  
   447  var ybtl = []ytab{
   448  	{Zibo_m, 2, argList{Yi8, Yml}},
   449  	{Zr_m, 1, argList{Yrl, Yml}},
   450  }
   451  
   452  var ymovw = []ytab{
   453  	{Zr_m, 1, argList{Yrl, Yml}},
   454  	{Zm_r, 1, argList{Yml, Yrl}},
   455  	{Zil_rp, 1, argList{Yi32, Yrl}},
   456  	{Zilo_m, 2, argList{Yi32, Yml}},
   457  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   458  }
   459  
   460  var ymovl = []ytab{
   461  	{Zr_m, 1, argList{Yrl, Yml}},
   462  	{Zm_r, 1, argList{Yml, Yrl}},
   463  	{Zil_rp, 1, argList{Yi32, Yrl}},
   464  	{Zilo_m, 2, argList{Yi32, Yml}},
   465  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   466  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   467  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   468  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   469  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   470  }
   471  
   472  var yret = []ytab{
   473  	{Zo_iw, 1, argList{}},
   474  	{Zo_iw, 1, argList{Yi32}},
   475  }
   476  
   477  var ymovq = []ytab{
   478  	// valid in 32-bit mode
   479  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   480  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   481  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   482  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   483  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   484  
   485  	// valid only in 64-bit mode, usually with 64-bit prefix
   486  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   487  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   488  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   489  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   490  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   491  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   492  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   493  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   494  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   495  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   496  }
   497  
   498  var ymovbe = []ytab{
   499  	{Zlitm_r, 3, argList{Ym, Yrl}},
   500  	{Zlitr_m, 3, argList{Yrl, Ym}},
   501  }
   502  
   503  var ym_rl = []ytab{
   504  	{Zm_r, 1, argList{Ym, Yrl}},
   505  }
   506  
   507  var yrl_m = []ytab{
   508  	{Zr_m, 1, argList{Yrl, Ym}},
   509  }
   510  
   511  var ymb_rl = []ytab{
   512  	{Zmb_r, 1, argList{Ymb, Yrl}},
   513  }
   514  
   515  var yml_rl = []ytab{
   516  	{Zm_r, 1, argList{Yml, Yrl}},
   517  }
   518  
   519  var yrl_ml = []ytab{
   520  	{Zr_m, 1, argList{Yrl, Yml}},
   521  }
   522  
   523  var yml_mb = []ytab{
   524  	{Zr_m, 1, argList{Yrb, Ymb}},
   525  	{Zm_r, 1, argList{Ymb, Yrb}},
   526  }
   527  
   528  var yrb_mb = []ytab{
   529  	{Zr_m, 1, argList{Yrb, Ymb}},
   530  }
   531  
   532  var yxchg = []ytab{
   533  	{Z_rp, 1, argList{Yax, Yrl}},
   534  	{Zrp_, 1, argList{Yrl, Yax}},
   535  	{Zr_m, 1, argList{Yrl, Yml}},
   536  	{Zm_r, 1, argList{Yml, Yrl}},
   537  }
   538  
   539  var ydivl = []ytab{
   540  	{Zm_o, 2, argList{Yml}},
   541  }
   542  
   543  var ydivb = []ytab{
   544  	{Zm_o, 2, argList{Ymb}},
   545  }
   546  
   547  var yimul = []ytab{
   548  	{Zm_o, 2, argList{Yml}},
   549  	{Zib_rr, 1, argList{Yi8, Yrl}},
   550  	{Zil_rr, 1, argList{Yi32, Yrl}},
   551  	{Zm_r, 2, argList{Yml, Yrl}},
   552  }
   553  
   554  var yimul3 = []ytab{
   555  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   556  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   557  }
   558  
   559  var ybyte = []ytab{
   560  	{Zbyte, 1, argList{Yi64}},
   561  }
   562  
   563  var yin = []ytab{
   564  	{Zib_, 1, argList{Yi32}},
   565  	{Zlit, 1, argList{}},
   566  }
   567  
   568  var yint = []ytab{
   569  	{Zib_, 1, argList{Yi32}},
   570  }
   571  
   572  var ypushl = []ytab{
   573  	{Zrp_, 1, argList{Yrl}},
   574  	{Zm_o, 2, argList{Ym}},
   575  	{Zib_, 1, argList{Yi8}},
   576  	{Zil_, 1, argList{Yi32}},
   577  }
   578  
   579  var ypopl = []ytab{
   580  	{Z_rp, 1, argList{Yrl}},
   581  	{Zo_m, 2, argList{Ym}},
   582  }
   583  
   584  var ywrfsbase = []ytab{
   585  	{Zm_o, 2, argList{Yrl}},
   586  }
   587  
   588  var yrdrand = []ytab{
   589  	{Zo_m, 2, argList{Yrl}},
   590  }
   591  
   592  var yclflush = []ytab{
   593  	{Zo_m, 2, argList{Ym}},
   594  }
   595  
   596  var ybswap = []ytab{
   597  	{Z_rp, 2, argList{Yrl}},
   598  }
   599  
   600  var yscond = []ytab{
   601  	{Zo_m, 2, argList{Ymb}},
   602  }
   603  
   604  var yjcond = []ytab{
   605  	{Zbr, 0, argList{Ybr}},
   606  	{Zbr, 0, argList{Yi0, Ybr}},
   607  	{Zbr, 1, argList{Yi1, Ybr}},
   608  }
   609  
   610  var yloop = []ytab{
   611  	{Zloop, 1, argList{Ybr}},
   612  }
   613  
   614  var ycall = []ytab{
   615  	{Zcallindreg, 0, argList{Yml}},
   616  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   617  	{Zcallind, 2, argList{Yindir}},
   618  	{Zcall, 0, argList{Ybr}},
   619  	{Zcallcon, 1, argList{Yi32}},
   620  }
   621  
   622  var yduff = []ytab{
   623  	{Zcallduff, 1, argList{Yi32}},
   624  }
   625  
   626  var yjmp = []ytab{
   627  	{Zo_m64, 2, argList{Yml}},
   628  	{Zjmp, 0, argList{Ybr}},
   629  	{Zjmpcon, 1, argList{Yi32}},
   630  }
   631  
   632  var yfmvd = []ytab{
   633  	{Zm_o, 2, argList{Ym, Yf0}},
   634  	{Zo_m, 2, argList{Yf0, Ym}},
   635  	{Zm_o, 2, argList{Yrf, Yf0}},
   636  	{Zo_m, 2, argList{Yf0, Yrf}},
   637  }
   638  
   639  var yfmvdp = []ytab{
   640  	{Zo_m, 2, argList{Yf0, Ym}},
   641  	{Zo_m, 2, argList{Yf0, Yrf}},
   642  }
   643  
   644  var yfmvf = []ytab{
   645  	{Zm_o, 2, argList{Ym, Yf0}},
   646  	{Zo_m, 2, argList{Yf0, Ym}},
   647  }
   648  
   649  var yfmvx = []ytab{
   650  	{Zm_o, 2, argList{Ym, Yf0}},
   651  }
   652  
   653  var yfmvp = []ytab{
   654  	{Zo_m, 2, argList{Yf0, Ym}},
   655  }
   656  
   657  var yfcmv = []ytab{
   658  	{Zm_o, 2, argList{Yrf, Yf0}},
   659  }
   660  
   661  var yfadd = []ytab{
   662  	{Zm_o, 2, argList{Ym, Yf0}},
   663  	{Zm_o, 2, argList{Yrf, Yf0}},
   664  	{Zo_m, 2, argList{Yf0, Yrf}},
   665  }
   666  
   667  var yfxch = []ytab{
   668  	{Zo_m, 2, argList{Yf0, Yrf}},
   669  	{Zm_o, 2, argList{Yrf, Yf0}},
   670  }
   671  
   672  var ycompp = []ytab{
   673  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   674  }
   675  
   676  var ystsw = []ytab{
   677  	{Zo_m, 2, argList{Ym}},
   678  	{Zlit, 1, argList{Yax}},
   679  }
   680  
   681  var ysvrs_mo = []ytab{
   682  	{Zm_o, 2, argList{Ym}},
   683  }
   684  
   685  // unaryDst version of "ysvrs_mo".
   686  var ysvrs_om = []ytab{
   687  	{Zo_m, 2, argList{Ym}},
   688  }
   689  
   690  var ymm = []ytab{
   691  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   692  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   693  }
   694  
   695  var yxm = []ytab{
   696  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   697  }
   698  
   699  var yxm_q4 = []ytab{
   700  	{Zm_r, 1, argList{Yxm, Yxr}},
   701  }
   702  
   703  var yxcvm1 = []ytab{
   704  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   705  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   706  }
   707  
   708  var yxcvm2 = []ytab{
   709  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   710  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   711  }
   712  
   713  var yxr = []ytab{
   714  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   715  }
   716  
   717  var yxr_ml = []ytab{
   718  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   719  }
   720  
   721  var ymr = []ytab{
   722  	{Zm_r, 1, argList{Ymr, Ymr}},
   723  }
   724  
   725  var ymr_ml = []ytab{
   726  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   727  }
   728  
   729  var yxcmpi = []ytab{
   730  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   731  }
   732  
   733  var yxmov = []ytab{
   734  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   735  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   736  }
   737  
   738  var yxcvfl = []ytab{
   739  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   740  }
   741  
   742  var yxcvlf = []ytab{
   743  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   744  }
   745  
   746  var yxcvfq = []ytab{
   747  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   748  }
   749  
   750  var yxcvqf = []ytab{
   751  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   752  }
   753  
   754  var yps = []ytab{
   755  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   756  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   757  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   758  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   759  }
   760  
   761  var yxrrl = []ytab{
   762  	{Zm_r, 1, argList{Yxr, Yrl}},
   763  }
   764  
   765  var ymrxr = []ytab{
   766  	{Zm_r, 1, argList{Ymr, Yxr}},
   767  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   768  }
   769  
   770  var ymshuf = []ytab{
   771  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   772  }
   773  
   774  var ymshufb = []ytab{
   775  	{Zm2_r, 2, argList{Yxm, Yxr}},
   776  }
   777  
   778  // It should never have more than 1 entry,
   779  // because some optab entries you opcode secuences that
   780  // are longer than 2 bytes (zoffset=2 here),
   781  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   782  // to name a few.
   783  var yxshuf = []ytab{
   784  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   785  }
   786  
   787  var yextrw = []ytab{
   788  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   789  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   790  }
   791  
   792  var yextr = []ytab{
   793  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   794  }
   795  
   796  var yinsrw = []ytab{
   797  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   798  }
   799  
   800  var yinsr = []ytab{
   801  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   802  }
   803  
   804  var ypsdq = []ytab{
   805  	{Zibo_m, 2, argList{Yi8, Yxr}},
   806  }
   807  
   808  var ymskb = []ytab{
   809  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   810  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   811  }
   812  
   813  var ycrc32l = []ytab{
   814  	{Zlitm_r, 0, argList{Yml, Yrl}},
   815  }
   816  
   817  var ycrc32b = []ytab{
   818  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   819  }
   820  
   821  var yprefetch = []ytab{
   822  	{Zm_o, 2, argList{Ym}},
   823  }
   824  
   825  var yaes = []ytab{
   826  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   827  }
   828  
   829  var yxbegin = []ytab{
   830  	{Zjmp, 1, argList{Ybr}},
   831  }
   832  
   833  var yxabort = []ytab{
   834  	{Zib_, 1, argList{Yu8}},
   835  }
   836  
   837  var ylddqu = []ytab{
   838  	{Zm_r, 1, argList{Ym, Yxr}},
   839  }
   840  
   841  var ypalignr = []ytab{
   842  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   843  }
   844  
   845  var ysha256rnds2 = []ytab{
   846  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   847  }
   848  
   849  var yblendvpd = []ytab{
   850  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   851  }
   852  
   853  var ymmxmm0f38 = []ytab{
   854  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   855  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   856  }
   857  
   858  var yextractps = []ytab{
   859  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   860  }
   861  
   862  var ysha1rnds4 = []ytab{
   863  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   864  }
   865  
   866  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   867  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   868  // to find the entry with the given p.As and then looks through the ytable for
   869  // that instruction (the second field in the optab struct) for a line whose
   870  // first two values match the Ytypes of the p.From and p.To operands.  The
   871  // function oclass computes the specific Ytype of an operand and then the set
   872  // of more general Ytypes that it satisfies is implied by the ycover table, set
   873  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   874  // from the more general 8-bit constants, but instinit says
   875  //
   876  //        ycover[Yi0*Ymax+Ys32] = 1
   877  //        ycover[Yi1*Ymax+Ys32] = 1
   878  //        ycover[Yi8*Ymax+Ys32] = 1
   879  //
   880  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   881  // if that's what an instruction can handle.
   882  //
   883  // In parallel with the scan through the ytable for the appropriate line, there
   884  // is a z pointer that starts out pointing at the strange magic byte list in
   885  // the Optab struct.  With each step past a non-matching ytable line, z
   886  // advances by the 4th entry in the line.  When a matching line is found, that
   887  // z pointer has the extra data to use in laying down the instruction bytes.
   888  // The actual bytes laid down are a function of the 3rd entry in the line (that
   889  // is, the Ztype) and the z bytes.
   890  //
   891  // For example, let's look at AADDL.  The optab line says:
   892  //        {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //        var yaddl = []ytab{
   896  //                {Yi8, Ynone, Yml, Zibo_m, 2},
   897  //                {Yi32, Ynone, Yax, Zil_, 1},
   898  //                {Yi32, Ynone, Yml, Zilo_m, 2},
   899  //                {Yrl, Ynone, Yml, Zr_m, 1},
   900  //                {Yml, Ynone, Yrl, Zm_r, 1},
   901  //        }
   902  //
   903  // so there are 5 possible types of ADDL instruction that can be laid down, and
   904  // possible states used to lay them down (Ztype and z pointer, assuming z
   905  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   906  //
   907  //        Yi8, Yml -> Zibo_m, z (0x83, 00)
   908  //        Yi32, Yax -> Zil_, z+2 (0x05)
   909  //        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   910  //        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   911  //        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   912  //
   913  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   914  // relatively straightforward as this program goes.
   915  //
   916  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   917  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   918  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   919  // Zilo_m is the same but a long (32-bit) immediate.
   920  var optab =
   921  //	as, ytab, andproto, opcode
   922  [...]Optab{
   923  	{obj.AXXX, nil, 0, opBytes{}},
   924  	{AAAA, ynone, P32, opBytes{0x37}},
   925  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   926  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   927  	{AAAS, ynone, P32, opBytes{0x3f}},
   928  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   929  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   930  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   933  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   934  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   935  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   936  	{AADDPD, yxm, Pq, opBytes{0x58}},
   937  	{AADDPS, yxm, Pm, opBytes{0x58}},
   938  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   939  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   940  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   941  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   942  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   943  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   944  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   945  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   946  	{AADJSP, nil, 0, opBytes{}},
   947  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   948  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   949  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   950  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   951  	{AANDPD, yxm, Pq, opBytes{0x54}},
   952  	{AANDPS, yxm, Pm, opBytes{0x54}},
   953  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   954  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   956  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   957  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   958  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   959  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   960  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   961  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   962  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   963  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   964  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   965  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   966  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   967  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   968  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   969  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   970  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   971  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   972  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   973  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   974  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   975  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   976  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   977  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   978  	{ABYTE, ybyte, Px, opBytes{1}},
   979  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   980  	{ACBW, ynone, Pe, opBytes{0x98}},
   981  	{ACDQ, ynone, Px, opBytes{0x99}},
   982  	{ACDQE, ynone, Pw, opBytes{0x98}},
   983  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   984  	{ACLC, ynone, Px, opBytes{0xf8}},
   985  	{ACLD, ynone, Px, opBytes{0xfc}},
   986  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   987  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   988  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   989  	{ACLI, ynone, Px, opBytes{0xfa}},
   990  	{ACLTS, ynone, Pm, opBytes{0x06}},
   991  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   992  	{ACMC, ynone, Px, opBytes{0xf5}},
   993  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   994  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   995  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   996  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   997  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   998  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
   999  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1000  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1001  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1002  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1003  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1004  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1005  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1006  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1007  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1008  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1009  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1010  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1011  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1012  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1013  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1014  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1015  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1016  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1017  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1018  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1019  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1020  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1021  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1022  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1023  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1024  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1025  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1026  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1027  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1028  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1029  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1030  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1031  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1032  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1033  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1034  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1035  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1036  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1037  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1038  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1039  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1040  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1041  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1042  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1043  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1044  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1045  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1046  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1047  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1048  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1049  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1050  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1051  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1052  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1053  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1054  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1055  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1056  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1057  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1058  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1059  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1060  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1061  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1062  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1063  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1064  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1065  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1066  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1067  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1068  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1069  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1070  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1071  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1072  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1073  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1074  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1075  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1076  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1077  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1078  	{ACWD, ynone, Pe, opBytes{0x99}},
  1079  	{ACWDE, ynone, Px, opBytes{0x98}},
  1080  	{ACQO, ynone, Pw, opBytes{0x99}},
  1081  	{ADAA, ynone, P32, opBytes{0x27}},
  1082  	{ADAS, ynone, P32, opBytes{0x2f}},
  1083  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1084  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1085  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1086  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1087  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1088  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1089  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1090  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1091  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1092  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1093  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1094  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1095  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1096  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1097  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1098  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1099  	{AENTER, nil, 0, opBytes{}}, // botch
  1100  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1101  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1102  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1103  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1104  	{AHLT, ynone, Px, opBytes{0xf4}},
  1105  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1106  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1107  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1108  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1109  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1110  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1111  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1114  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1115  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1117  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1118  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1119  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1120  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1121  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1122  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1123  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1124  	{AINSL, ynone, Px, opBytes{0x6d}},
  1125  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1126  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1127  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1128  	{AINT, yint, Px, opBytes{0xcd}},
  1129  	{AINTO, ynone, P32, opBytes{0xce}},
  1130  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1131  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1132  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1133  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1134  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1135  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1136  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1137  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1138  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1139  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1140  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1141  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1142  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1143  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1144  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1145  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1146  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1147  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1148  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1149  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1150  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1151  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1152  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1153  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1154  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1155  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1156  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1157  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1158  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1159  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1160  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1161  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1162  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1163  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1164  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1165  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1166  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1167  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1168  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1169  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1170  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1171  	{ALODSL, ynone, Px, opBytes{0xad}},
  1172  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1173  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1174  	{ALONG, ybyte, Px, opBytes{4}},
  1175  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1176  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1177  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1178  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1179  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1180  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1181  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1182  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1183  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1184  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1185  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1186  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1187  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1188  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1189  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1190  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1191  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1192  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1193  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1194  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1195  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1196  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1197  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1198  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1199  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1200  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1201  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1202  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1203  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1204  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1205  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1206  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1207  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1208  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1209  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1210  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1211  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1212  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1213  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1214  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1215  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1216  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1217  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1218  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1219  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1220  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1221  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1222  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1223  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1224  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1225  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1226  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1227  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1228  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1229  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1230  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1231  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1232  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1233  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1234  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1235  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1236  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1237  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1238  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1239  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1240  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1241  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1242  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1243  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1244  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1245  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1246  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1247  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1248  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1249  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1250  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1251  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1252  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1253  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1254  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1255  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1256  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1257  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1258  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1259  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1260  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1261  	{AORPD, yxm, Pq, opBytes{0x56}},
  1262  	{AORPS, yxm, Pm, opBytes{0x56}},
  1263  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1264  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1265  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1266  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1267  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1268  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1269  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1270  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1271  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1272  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1273  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1274  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1275  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1276  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1277  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1278  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1279  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1280  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1281  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1282  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1283  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1284  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1285  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1286  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1287  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1288  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1289  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1290  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1291  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1292  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1293  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1294  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1295  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1296  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1297  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1298  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1299  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1300  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1301  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1302  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1303  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1304  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1305  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1306  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1307  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1308  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1309  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1310  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1311  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1312  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1313  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1314  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1315  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1316  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1317  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1318  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1319  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1320  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1321  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1322  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1323  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1324  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1325  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1326  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1327  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1328  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1329  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1330  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1331  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1332  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1333  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1334  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1335  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1336  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1337  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1338  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1339  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1340  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1341  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1342  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1343  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1344  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1345  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1346  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1347  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1348  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1349  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1350  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1351  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1352  	{APOPAL, ynone, P32, opBytes{0x61}},
  1353  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1354  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1355  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1356  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1357  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1358  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1359  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1360  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1361  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1362  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1363  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1364  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1365  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1366  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1367  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1368  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1369  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1370  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1371  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1372  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1373  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1374  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1375  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1376  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1377  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1378  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1379  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1380  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1381  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1382  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1383  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1384  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1385  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1386  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1387  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1388  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1389  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1390  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1391  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1392  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1393  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1394  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1395  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1396  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1397  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1398  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1399  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1400  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1401  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1402  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1403  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1404  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1405  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1406  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1407  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1409  	{AQUAD, ybyte, Px, opBytes{8}},
  1410  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1411  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1412  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1413  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1415  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1416  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1417  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1418  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1419  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{AREP, ynone, Px, opBytes{0xf3}},
  1421  	{AREPN, ynone, Px, opBytes{0xf2}},
  1422  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1423  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1424  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1425  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1426  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1427  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1428  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1429  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1431  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1432  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1433  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1435  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1436  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1437  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1438  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1439  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1440  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1442  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1443  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1444  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1446  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1447  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1448  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1450  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1451  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1452  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1453  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1454  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1455  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1456  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1457  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1458  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1459  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1460  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1461  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1462  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1463  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1464  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1465  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1466  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1467  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1468  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1469  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1470  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1471  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1472  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1474  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1475  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1476  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1478  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1479  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1480  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1481  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1482  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1483  	{ASTC, ynone, Px, opBytes{0xf9}},
  1484  	{ASTD, ynone, Px, opBytes{0xfd}},
  1485  	{ASTI, ynone, Px, opBytes{0xfb}},
  1486  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1487  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1488  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1489  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1490  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1491  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1492  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1493  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1494  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1495  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1496  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1497  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1498  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1499  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1500  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1501  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1502  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1503  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1504  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1506  	{obj.ATEXT, ytext, Px, opBytes{}},
  1507  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1508  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1509  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1510  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1511  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1512  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1513  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1514  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1515  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1516  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1517  	{AWORD, ybyte, Px, opBytes{2}},
  1518  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1519  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1520  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1521  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1523  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1524  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1525  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1526  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1527  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1528  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1530  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1531  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1532  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1533  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1534  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1535  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1536  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1537  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1538  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1539  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1540  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1541  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1542  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1543  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1544  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1545  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1546  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1547  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1548  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1549  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1550  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1551  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1552  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1553  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1554  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1555  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1556  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1557  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1558  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1559  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1560  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1561  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1562  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1563  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1564  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1565  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1566  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1567  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1568  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1569  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1570  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1571  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1572  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1573  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1574  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1575  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1576  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1577  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1578  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1579  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1580  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1581  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1582  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1583  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1584  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1585  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1586  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1587  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1588  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1589  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1590  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1591  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1592  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1593  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1594  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1595  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1596  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1597  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1598  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1599  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1600  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1601  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1602  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1603  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1604  	{AFFREE, nil, 0, opBytes{}},
  1605  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1606  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1607  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1608  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1609  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1610  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1611  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1612  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1613  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1614  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1615  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1616  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1617  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1618  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1619  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1620  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1621  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1622  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1623  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1624  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1625  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1626  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1627  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1628  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1629  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1630  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1631  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1632  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1633  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1634  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1635  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1636  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1637  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1638  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1639  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1640  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1641  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1642  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1643  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1644  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1645  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1646  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1647  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1649  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1650  	{AINVD, ynone, Pm, opBytes{0x08}},
  1651  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1652  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1653  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1654  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1655  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1656  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1657  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1658  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1659  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1660  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1661  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1662  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1663  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1664  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1665  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1666  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1667  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1668  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1669  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1670  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1671  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1672  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1673  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1674  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1676  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1677  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1678  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1679  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1680  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1681  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1682  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1683  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1684  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1685  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1686  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1687  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1688  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1689  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1690  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1691  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1692  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1693  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1694  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1695  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1696  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1697  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1698  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1699  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1700  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1701  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1702  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1703  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1704  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1705  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1706  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1707  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1708  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1709  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1710  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1711  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1712  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1713  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1715  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1716  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1717  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1718  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1719  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1720  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1721  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1722  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1723  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1724  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1725  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1726  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1727  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1728  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1729  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1730  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1732  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1733  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1734  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1735  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1736  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1737  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1738  	{AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1739  	{AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1740  	{AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1741  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1742  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1743  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1744  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1745  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1746  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1747  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1748  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1749  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1750  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1751  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1752  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1753  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1754  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1755  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1756  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1757  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1758  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1759  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1760  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1761  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1762  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1763  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1764  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1765  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1766  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1767  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1768  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1769  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1770  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1771  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1772  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1773  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1774  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1775  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1776  
  1777  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1778  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1779  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1780  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1781  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1782  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1783  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1784  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1785  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1786  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1787  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1788  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1789  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1790  
  1791  	{obj.AEND, nil, 0, opBytes{}},
  1792  	{0, nil, 0, opBytes{}},
  1793  }
  1794  
  1795  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1796  
  1797  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1798  // This happens on systems like Solaris that call .so functions instead of system calls.
  1799  // It does not seem to be necessary for any other systems. This is probably working
  1800  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1801  // what that bug is. And this does fix it.
  1802  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1803  	if ctxt.Headtype == objabi.Hsolaris {
  1804  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1805  		return strings.HasPrefix(s.Name, "libc_")
  1806  	}
  1807  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1808  }
  1809  
  1810  // single-instruction no-ops of various lengths.
  1811  // constructed by hand and disassembled with gdb to verify.
  1812  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1813  var nop = [][16]uint8{
  1814  	{0x90},
  1815  	{0x66, 0x90},
  1816  	{0x0F, 0x1F, 0x00},
  1817  	{0x0F, 0x1F, 0x40, 0x00},
  1818  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1819  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1820  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1821  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1822  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1823  }
  1824  
  1825  // Native Client rejects the repeated 0x66 prefix.
  1826  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1827  func fillnop(p []byte, n int) {
  1828  	var m int
  1829  
  1830  	for n > 0 {
  1831  		m = n
  1832  		if m > len(nop) {
  1833  			m = len(nop)
  1834  		}
  1835  		copy(p[:m], nop[m-1][:m])
  1836  		p = p[m:]
  1837  		n -= m
  1838  	}
  1839  }
  1840  
  1841  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1842  	s.Grow(int64(c) + int64(pad))
  1843  	fillnop(s.P[c:], int(pad))
  1844  	return c + pad
  1845  }
  1846  
  1847  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1848  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1849  		return l
  1850  	}
  1851  	return q
  1852  }
  1853  
  1854  // If the environment variable GOAMD64=alignedjumps the assembler will ensure that
  1855  // no standalone or macro-fused jump will straddle or end on a 32 byte boundary
  1856  // by inserting NOPs before the jumps
  1857  func isJump(p *obj.Prog) bool {
  1858  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1859  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1860  }
  1861  
  1862  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1863  // jump. Otherwise, nil is returned.
  1864  func lookForJCC(p *obj.Prog) *obj.Prog {
  1865  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1866  	var q *obj.Prog
  1867  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1868  	}
  1869  
  1870  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1871  		return nil
  1872  	}
  1873  
  1874  	switch q.As {
  1875  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1876  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1877  	default:
  1878  		return nil
  1879  	}
  1880  
  1881  	return q
  1882  }
  1883  
  1884  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1885  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1886  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1887  func fusedJump(p *obj.Prog) (bool, uint8) {
  1888  	var fusedSize uint8
  1889  
  1890  	// The first instruction in a macro fused pair may be preceeded by the LOCK prefix,
  1891  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1892  	// need to be careful to insert any padding before the locks rather than directly after them.
  1893  
  1894  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1895  		fusedSize += p.Isize
  1896  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1897  		}
  1898  		if p == nil {
  1899  			return false, 0
  1900  		}
  1901  	}
  1902  	if p.As == ALOCK {
  1903  		fusedSize += p.Isize
  1904  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1905  		}
  1906  		if p == nil {
  1907  			return false, 0
  1908  		}
  1909  	}
  1910  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1911  
  1912  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1913  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1914  
  1915  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1916  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1917  
  1918  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1919  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1920  
  1921  	if !cmpAddSub && !testAnd && !incDec {
  1922  		return false, 0
  1923  	}
  1924  
  1925  	if !incDec {
  1926  		var argOne obj.AddrType
  1927  		var argTwo obj.AddrType
  1928  		if cmp {
  1929  			argOne = p.From.Type
  1930  			argTwo = p.To.Type
  1931  		} else {
  1932  			argOne = p.To.Type
  1933  			argTwo = p.From.Type
  1934  		}
  1935  		if argOne == obj.TYPE_REG {
  1936  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1937  				return false, 0
  1938  			}
  1939  		} else if argOne == obj.TYPE_MEM {
  1940  			if argTwo != obj.TYPE_REG {
  1941  				return false, 0
  1942  			}
  1943  		} else {
  1944  			return false, 0
  1945  		}
  1946  	}
  1947  
  1948  	fusedSize += p.Isize
  1949  	jmp := lookForJCC(p)
  1950  	if jmp == nil {
  1951  		return false, 0
  1952  	}
  1953  
  1954  	fusedSize += jmp.Isize
  1955  
  1956  	if testAnd {
  1957  		return true, fusedSize
  1958  	}
  1959  
  1960  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1961  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1962  		return false, 0
  1963  	}
  1964  
  1965  	if cmpAddSub {
  1966  		return true, fusedSize
  1967  	}
  1968  
  1969  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1970  		return false, 0
  1971  	}
  1972  
  1973  	return true, fusedSize
  1974  }
  1975  
  1976  type padJumpsCtx int32
  1977  
  1978  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1979  	// Disable jump padding on 32 bit builds by settting
  1980  	// padJumps to 0.
  1981  	if ctxt.Arch.Family == sys.I386 {
  1982  		return padJumpsCtx(0)
  1983  	}
  1984  
  1985  	// Disable jump padding for hand written assembly code.
  1986  	if ctxt.IsAsm {
  1987  		return padJumpsCtx(0)
  1988  	}
  1989  
  1990  	if objabi.GOAMD64 != "alignedjumps" {
  1991  		return padJumpsCtx(0)
  1992  
  1993  	}
  1994  
  1995  	return padJumpsCtx(32)
  1996  }
  1997  
  1998  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1999  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  2000  // not cross or end on a 32 byte boundary.
  2001  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  2002  	if pjc == 0 {
  2003  		return c
  2004  	}
  2005  
  2006  	var toPad int32
  2007  	fj, fjSize := fusedJump(p)
  2008  	mask := int32(pjc - 1)
  2009  	if fj {
  2010  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2011  			toPad = int32(pjc) - (c & mask)
  2012  		}
  2013  	} else if isJump(p) {
  2014  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2015  			toPad = int32(pjc) - (c & mask)
  2016  		}
  2017  	}
  2018  	if toPad <= 0 {
  2019  		return c
  2020  	}
  2021  
  2022  	return noppad(ctxt, s, c, toPad)
  2023  }
  2024  
  2025  // reAssemble is called if an instruction's size changes during assembly. If
  2026  // it does and the instruction is a standalone or a macro-fused jump we need to
  2027  // reassemble.
  2028  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2029  	if pjc == 0 {
  2030  		return false
  2031  	}
  2032  
  2033  	fj, _ := fusedJump(p)
  2034  	return fj || isJump(p)
  2035  }
  2036  
  2037  type nopPad struct {
  2038  	p *obj.Prog // Instruction before the pad
  2039  	n int32     // Size of the pad
  2040  }
  2041  
  2042  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2043  	pjc := makePjcCtx(ctxt)
  2044  
  2045  	if s.P != nil {
  2046  		return
  2047  	}
  2048  
  2049  	if ycover[0] == 0 {
  2050  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2051  	}
  2052  
  2053  	for p := s.Func.Text; p != nil; p = p.Link {
  2054  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2055  			p.To.SetTarget(p)
  2056  		}
  2057  		if p.As == AADJSP {
  2058  			p.To.Type = obj.TYPE_REG
  2059  			p.To.Reg = REG_SP
  2060  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2061  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2062  			// For that case, flip the sign and the op:
  2063  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2064  			switch v := p.From.Offset; {
  2065  			case v == 0:
  2066  				p.As = obj.ANOP
  2067  			case v == 0x80 || (v < 0 && v != -0x80):
  2068  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2069  				p.From.Offset *= -1
  2070  			default:
  2071  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2072  			}
  2073  		}
  2074  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2075  			if p.To.Type != obj.TYPE_REG {
  2076  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2077  				continue
  2078  			}
  2079  			p.To.Type = obj.TYPE_BRANCH
  2080  			p.To.Name = obj.NAME_EXTERN
  2081  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2082  			p.To.Reg = 0
  2083  			p.To.Offset = 0
  2084  		}
  2085  	}
  2086  
  2087  	var count int64 // rough count of number of instructions
  2088  	for p := s.Func.Text; p != nil; p = p.Link {
  2089  		count++
  2090  		p.Back = branchShort // use short branches first time through
  2091  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2092  			p.Back |= branchBackwards
  2093  			q.Back |= branchLoopHead
  2094  		}
  2095  	}
  2096  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2097  
  2098  	var ab AsmBuf
  2099  	var n int
  2100  	var c int32
  2101  	errors := ctxt.Errors
  2102  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2103  	for {
  2104  		// This loop continues while there are reasons to re-assemble
  2105  		// whole block, like the presence of long forward jumps.
  2106  		reAssemble := false
  2107  		for i := range s.R {
  2108  			s.R[i] = obj.Reloc{}
  2109  		}
  2110  		s.R = s.R[:0]
  2111  		s.P = s.P[:0]
  2112  		c = 0
  2113  		var pPrev *obj.Prog
  2114  		nops = nops[:0]
  2115  		for p := s.Func.Text; p != nil; p = p.Link {
  2116  			c0 := c
  2117  			c = pjc.padJump(ctxt, s, p, c)
  2118  
  2119  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2120  				// pad with NOPs
  2121  				v := -c & (loopAlign - 1)
  2122  
  2123  				if v <= maxLoopPad {
  2124  					s.Grow(int64(c) + int64(v))
  2125  					fillnop(s.P[c:], int(v))
  2126  					c += v
  2127  				}
  2128  			}
  2129  
  2130  			p.Pc = int64(c)
  2131  
  2132  			// process forward jumps to p
  2133  			for q := p.Rel; q != nil; q = q.Forwd {
  2134  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2135  				if q.Back&branchShort != 0 {
  2136  					if v > 127 {
  2137  						reAssemble = true
  2138  						q.Back ^= branchShort
  2139  					}
  2140  
  2141  					if q.As == AJCXZL || q.As == AXBEGIN {
  2142  						s.P[q.Pc+2] = byte(v)
  2143  					} else {
  2144  						s.P[q.Pc+1] = byte(v)
  2145  					}
  2146  				} else {
  2147  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2148  				}
  2149  			}
  2150  
  2151  			p.Rel = nil
  2152  
  2153  			p.Pc = int64(c)
  2154  			ab.asmins(ctxt, s, p)
  2155  			m := ab.Len()
  2156  			if int(p.Isize) != m {
  2157  				p.Isize = uint8(m)
  2158  				if pjc.reAssemble(p) {
  2159  					// We need to re-assemble here to check for jumps and fused jumps
  2160  					// that span or end on 32 byte boundaries.
  2161  					reAssemble = true
  2162  				}
  2163  			}
  2164  
  2165  			s.Grow(p.Pc + int64(m))
  2166  			copy(s.P[p.Pc:], ab.Bytes())
  2167  			// If there was padding, remember it.
  2168  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2169  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2170  			}
  2171  			c += int32(m)
  2172  			pPrev = p
  2173  		}
  2174  
  2175  		n++
  2176  		if n > 20 {
  2177  			ctxt.Diag("span must be looping")
  2178  			log.Fatalf("loop")
  2179  		}
  2180  		if !reAssemble {
  2181  			break
  2182  		}
  2183  		if ctxt.Errors > errors {
  2184  			return
  2185  		}
  2186  	}
  2187  	// splice padding nops into Progs
  2188  	for _, n := range nops {
  2189  		pp := n.p
  2190  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2191  		pp.Link = np
  2192  	}
  2193  
  2194  	s.Size = int64(c)
  2195  
  2196  	if false { /* debug['a'] > 1 */
  2197  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2198  		var i int
  2199  		for i = 0; i < len(s.P); i++ {
  2200  			fmt.Printf(" %.2x", s.P[i])
  2201  			if i%16 == 15 {
  2202  				fmt.Printf("\n  %.6x", uint(i+1))
  2203  			}
  2204  		}
  2205  
  2206  		if i%16 != 0 {
  2207  			fmt.Printf("\n")
  2208  		}
  2209  
  2210  		for i := 0; i < len(s.R); i++ {
  2211  			r := &s.R[i]
  2212  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2213  		}
  2214  	}
  2215  
  2216  	// Mark nonpreemptible instruction sequences.
  2217  	// The 2-instruction TLS access sequence
  2218  	//	MOVQ TLS, BX
  2219  	//	MOVQ 0(BX)(TLS*1), BX
  2220  	// is not async preemptible, as if it is preempted and resumed on
  2221  	// a different thread, the TLS address may become invalid.
  2222  	if !CanUse1InsnTLS(ctxt) {
  2223  		useTLS := func(p *obj.Prog) bool {
  2224  			// Only need to mark the second instruction, which has
  2225  			// REG_TLS as Index. (It is okay to interrupt and restart
  2226  			// the first instruction.)
  2227  			return p.From.Index == REG_TLS
  2228  		}
  2229  		obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS, nil)
  2230  	}
  2231  }
  2232  
  2233  func instinit(ctxt *obj.Link) {
  2234  	if ycover[0] != 0 {
  2235  		// Already initialized; stop now.
  2236  		// This happens in the cmd/asm tests,
  2237  		// each of which re-initializes the arch.
  2238  		return
  2239  	}
  2240  
  2241  	switch ctxt.Headtype {
  2242  	case objabi.Hplan9:
  2243  		plan9privates = ctxt.Lookup("_privates")
  2244  	}
  2245  
  2246  	for i := range avxOptab {
  2247  		c := avxOptab[i].as
  2248  		if opindex[c&obj.AMask] != nil {
  2249  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2250  		}
  2251  		opindex[c&obj.AMask] = &avxOptab[i]
  2252  	}
  2253  	for i := 1; optab[i].as != 0; i++ {
  2254  		c := optab[i].as
  2255  		if opindex[c&obj.AMask] != nil {
  2256  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2257  		}
  2258  		opindex[c&obj.AMask] = &optab[i]
  2259  	}
  2260  
  2261  	for i := 0; i < Ymax; i++ {
  2262  		ycover[i*Ymax+i] = 1
  2263  	}
  2264  
  2265  	ycover[Yi0*Ymax+Yu2] = 1
  2266  	ycover[Yi1*Ymax+Yu2] = 1
  2267  
  2268  	ycover[Yi0*Ymax+Yi8] = 1
  2269  	ycover[Yi1*Ymax+Yi8] = 1
  2270  	ycover[Yu2*Ymax+Yi8] = 1
  2271  	ycover[Yu7*Ymax+Yi8] = 1
  2272  
  2273  	ycover[Yi0*Ymax+Yu7] = 1
  2274  	ycover[Yi1*Ymax+Yu7] = 1
  2275  	ycover[Yu2*Ymax+Yu7] = 1
  2276  
  2277  	ycover[Yi0*Ymax+Yu8] = 1
  2278  	ycover[Yi1*Ymax+Yu8] = 1
  2279  	ycover[Yu2*Ymax+Yu8] = 1
  2280  	ycover[Yu7*Ymax+Yu8] = 1
  2281  
  2282  	ycover[Yi0*Ymax+Ys32] = 1
  2283  	ycover[Yi1*Ymax+Ys32] = 1
  2284  	ycover[Yu2*Ymax+Ys32] = 1
  2285  	ycover[Yu7*Ymax+Ys32] = 1
  2286  	ycover[Yu8*Ymax+Ys32] = 1
  2287  	ycover[Yi8*Ymax+Ys32] = 1
  2288  
  2289  	ycover[Yi0*Ymax+Yi32] = 1
  2290  	ycover[Yi1*Ymax+Yi32] = 1
  2291  	ycover[Yu2*Ymax+Yi32] = 1
  2292  	ycover[Yu7*Ymax+Yi32] = 1
  2293  	ycover[Yu8*Ymax+Yi32] = 1
  2294  	ycover[Yi8*Ymax+Yi32] = 1
  2295  	ycover[Ys32*Ymax+Yi32] = 1
  2296  
  2297  	ycover[Yi0*Ymax+Yi64] = 1
  2298  	ycover[Yi1*Ymax+Yi64] = 1
  2299  	ycover[Yu7*Ymax+Yi64] = 1
  2300  	ycover[Yu2*Ymax+Yi64] = 1
  2301  	ycover[Yu8*Ymax+Yi64] = 1
  2302  	ycover[Yi8*Ymax+Yi64] = 1
  2303  	ycover[Ys32*Ymax+Yi64] = 1
  2304  	ycover[Yi32*Ymax+Yi64] = 1
  2305  
  2306  	ycover[Yal*Ymax+Yrb] = 1
  2307  	ycover[Ycl*Ymax+Yrb] = 1
  2308  	ycover[Yax*Ymax+Yrb] = 1
  2309  	ycover[Ycx*Ymax+Yrb] = 1
  2310  	ycover[Yrx*Ymax+Yrb] = 1
  2311  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2312  
  2313  	ycover[Ycl*Ymax+Ycx] = 1
  2314  
  2315  	ycover[Yax*Ymax+Yrx] = 1
  2316  	ycover[Ycx*Ymax+Yrx] = 1
  2317  
  2318  	ycover[Yax*Ymax+Yrl] = 1
  2319  	ycover[Ycx*Ymax+Yrl] = 1
  2320  	ycover[Yrx*Ymax+Yrl] = 1
  2321  	ycover[Yrl32*Ymax+Yrl] = 1
  2322  
  2323  	ycover[Yf0*Ymax+Yrf] = 1
  2324  
  2325  	ycover[Yal*Ymax+Ymb] = 1
  2326  	ycover[Ycl*Ymax+Ymb] = 1
  2327  	ycover[Yax*Ymax+Ymb] = 1
  2328  	ycover[Ycx*Ymax+Ymb] = 1
  2329  	ycover[Yrx*Ymax+Ymb] = 1
  2330  	ycover[Yrb*Ymax+Ymb] = 1
  2331  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2332  	ycover[Ym*Ymax+Ymb] = 1
  2333  
  2334  	ycover[Yax*Ymax+Yml] = 1
  2335  	ycover[Ycx*Ymax+Yml] = 1
  2336  	ycover[Yrx*Ymax+Yml] = 1
  2337  	ycover[Yrl*Ymax+Yml] = 1
  2338  	ycover[Yrl32*Ymax+Yml] = 1
  2339  	ycover[Ym*Ymax+Yml] = 1
  2340  
  2341  	ycover[Yax*Ymax+Ymm] = 1
  2342  	ycover[Ycx*Ymax+Ymm] = 1
  2343  	ycover[Yrx*Ymax+Ymm] = 1
  2344  	ycover[Yrl*Ymax+Ymm] = 1
  2345  	ycover[Yrl32*Ymax+Ymm] = 1
  2346  	ycover[Ym*Ymax+Ymm] = 1
  2347  	ycover[Ymr*Ymax+Ymm] = 1
  2348  
  2349  	ycover[Yxr0*Ymax+Yxr] = 1
  2350  
  2351  	ycover[Ym*Ymax+Yxm] = 1
  2352  	ycover[Yxr0*Ymax+Yxm] = 1
  2353  	ycover[Yxr*Ymax+Yxm] = 1
  2354  
  2355  	ycover[Ym*Ymax+Yym] = 1
  2356  	ycover[Yyr*Ymax+Yym] = 1
  2357  
  2358  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2359  	ycover[Yxr*Ymax+YxrEvex] = 1
  2360  
  2361  	ycover[Ym*Ymax+YxmEvex] = 1
  2362  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2363  	ycover[Yxr*Ymax+YxmEvex] = 1
  2364  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2365  
  2366  	ycover[Yyr*Ymax+YyrEvex] = 1
  2367  
  2368  	ycover[Ym*Ymax+YymEvex] = 1
  2369  	ycover[Yyr*Ymax+YymEvex] = 1
  2370  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2371  
  2372  	ycover[Ym*Ymax+Yzm] = 1
  2373  	ycover[Yzr*Ymax+Yzm] = 1
  2374  
  2375  	ycover[Yk0*Ymax+Yk] = 1
  2376  	ycover[Yknot0*Ymax+Yk] = 1
  2377  
  2378  	ycover[Yk0*Ymax+Ykm] = 1
  2379  	ycover[Yknot0*Ymax+Ykm] = 1
  2380  	ycover[Yk*Ymax+Ykm] = 1
  2381  	ycover[Ym*Ymax+Ykm] = 1
  2382  
  2383  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2384  
  2385  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2386  
  2387  	for i := 0; i < MAXREG; i++ {
  2388  		reg[i] = -1
  2389  		if i >= REG_AL && i <= REG_R15B {
  2390  			reg[i] = (i - REG_AL) & 7
  2391  			if i >= REG_SPB && i <= REG_DIB {
  2392  				regrex[i] = 0x40
  2393  			}
  2394  			if i >= REG_R8B && i <= REG_R15B {
  2395  				regrex[i] = Rxr | Rxx | Rxb
  2396  			}
  2397  		}
  2398  
  2399  		if i >= REG_AH && i <= REG_BH {
  2400  			reg[i] = 4 + ((i - REG_AH) & 7)
  2401  		}
  2402  		if i >= REG_AX && i <= REG_R15 {
  2403  			reg[i] = (i - REG_AX) & 7
  2404  			if i >= REG_R8 {
  2405  				regrex[i] = Rxr | Rxx | Rxb
  2406  			}
  2407  		}
  2408  
  2409  		if i >= REG_F0 && i <= REG_F0+7 {
  2410  			reg[i] = (i - REG_F0) & 7
  2411  		}
  2412  		if i >= REG_M0 && i <= REG_M0+7 {
  2413  			reg[i] = (i - REG_M0) & 7
  2414  		}
  2415  		if i >= REG_K0 && i <= REG_K0+7 {
  2416  			reg[i] = (i - REG_K0) & 7
  2417  		}
  2418  		if i >= REG_X0 && i <= REG_X0+15 {
  2419  			reg[i] = (i - REG_X0) & 7
  2420  			if i >= REG_X0+8 {
  2421  				regrex[i] = Rxr | Rxx | Rxb
  2422  			}
  2423  		}
  2424  		if i >= REG_X16 && i <= REG_X16+15 {
  2425  			reg[i] = (i - REG_X16) & 7
  2426  			if i >= REG_X16+8 {
  2427  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2428  			} else {
  2429  				regrex[i] = RxrEvex
  2430  			}
  2431  		}
  2432  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2433  			reg[i] = (i - REG_Y0) & 7
  2434  			if i >= REG_Y0+8 {
  2435  				regrex[i] = Rxr | Rxx | Rxb
  2436  			}
  2437  		}
  2438  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2439  			reg[i] = (i - REG_Y16) & 7
  2440  			if i >= REG_Y16+8 {
  2441  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2442  			} else {
  2443  				regrex[i] = RxrEvex
  2444  			}
  2445  		}
  2446  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2447  			reg[i] = (i - REG_Z0) & 7
  2448  			if i > REG_Z0+7 {
  2449  				regrex[i] = Rxr | Rxx | Rxb
  2450  			}
  2451  		}
  2452  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2453  			reg[i] = (i - REG_Z16) & 7
  2454  			if i >= REG_Z16+8 {
  2455  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2456  			} else {
  2457  				regrex[i] = RxrEvex
  2458  			}
  2459  		}
  2460  
  2461  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2462  			regrex[i] = Rxr
  2463  		}
  2464  	}
  2465  }
  2466  
  2467  var isAndroid = objabi.GOOS == "android"
  2468  
  2469  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2470  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2471  		return 0
  2472  	}
  2473  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2474  		switch a.Reg {
  2475  		case REG_CS:
  2476  			return 0x2e
  2477  
  2478  		case REG_DS:
  2479  			return 0x3e
  2480  
  2481  		case REG_ES:
  2482  			return 0x26
  2483  
  2484  		case REG_FS:
  2485  			return 0x64
  2486  
  2487  		case REG_GS:
  2488  			return 0x65
  2489  
  2490  		case REG_TLS:
  2491  			// NOTE: Systems listed here should be only systems that
  2492  			// support direct TLS references like 8(TLS) implemented as
  2493  			// direct references from FS or GS. Systems that require
  2494  			// the initial-exec model, where you load the TLS base into
  2495  			// a register and then index from that register, do not reach
  2496  			// this code and should not be listed.
  2497  			if ctxt.Arch.Family == sys.I386 {
  2498  				switch ctxt.Headtype {
  2499  				default:
  2500  					if isAndroid {
  2501  						return 0x65 // GS
  2502  					}
  2503  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2504  
  2505  				case objabi.Hdarwin,
  2506  					objabi.Hdragonfly,
  2507  					objabi.Hfreebsd,
  2508  					objabi.Hnetbsd,
  2509  					objabi.Hopenbsd:
  2510  					return 0x65 // GS
  2511  				}
  2512  			}
  2513  
  2514  			switch ctxt.Headtype {
  2515  			default:
  2516  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2517  
  2518  			case objabi.Hlinux:
  2519  				if isAndroid {
  2520  					return 0x64 // FS
  2521  				}
  2522  
  2523  				if ctxt.Flag_shared {
  2524  					log.Fatalf("unknown TLS base register for linux with -shared")
  2525  				} else {
  2526  					return 0x64 // FS
  2527  				}
  2528  
  2529  			case objabi.Hdragonfly,
  2530  				objabi.Hfreebsd,
  2531  				objabi.Hnetbsd,
  2532  				objabi.Hopenbsd,
  2533  				objabi.Hsolaris:
  2534  				return 0x64 // FS
  2535  
  2536  			case objabi.Hdarwin:
  2537  				return 0x65 // GS
  2538  			}
  2539  		}
  2540  	}
  2541  
  2542  	if ctxt.Arch.Family == sys.I386 {
  2543  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2544  			// When building for inclusion into a shared library, an instruction of the form
  2545  			//     MOVL off(CX)(TLS*1), AX
  2546  			// becomes
  2547  			//     mov %gs:off(%ecx), %eax
  2548  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2549  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2550  			// a shared library the instruction it becomes
  2551  			//     mov 0x0(%ecx), %eax
  2552  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2553  			return 0x65 // GS
  2554  		}
  2555  		return 0
  2556  	}
  2557  
  2558  	switch a.Index {
  2559  	case REG_CS:
  2560  		return 0x2e
  2561  
  2562  	case REG_DS:
  2563  		return 0x3e
  2564  
  2565  	case REG_ES:
  2566  		return 0x26
  2567  
  2568  	case REG_TLS:
  2569  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2570  			// When building for inclusion into a shared library, an instruction of the form
  2571  			//     MOV off(CX)(TLS*1), AX
  2572  			// becomes
  2573  			//     mov %fs:off(%rcx), %rax
  2574  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2575  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2576  			// a shared library the instruction does not require a prefix.
  2577  			return 0x64
  2578  		}
  2579  
  2580  	case REG_FS:
  2581  		return 0x64
  2582  
  2583  	case REG_GS:
  2584  		return 0x65
  2585  	}
  2586  
  2587  	return 0
  2588  }
  2589  
  2590  // oclassRegList returns multisource operand class for addr.
  2591  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2592  	// TODO(quasilyte): when oclass register case is refactored into
  2593  	// lookup table, use it here to get register kind more easily.
  2594  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2595  
  2596  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2597  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2598  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2599  
  2600  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2601  	low := regIndex(int16(reg0))
  2602  	high := regIndex(int16(reg1))
  2603  
  2604  	if ctxt.Arch.Family == sys.I386 {
  2605  		if low >= 8 || high >= 8 {
  2606  			return Yxxx
  2607  		}
  2608  	}
  2609  
  2610  	switch high - low {
  2611  	case 3:
  2612  		switch {
  2613  		case regIsXmm(reg0) && regIsXmm(reg1):
  2614  			return YxrEvexMulti4
  2615  		case regIsYmm(reg0) && regIsYmm(reg1):
  2616  			return YyrEvexMulti4
  2617  		case regIsZmm(reg0) && regIsZmm(reg1):
  2618  			return YzrMulti4
  2619  		default:
  2620  			return Yxxx
  2621  		}
  2622  	default:
  2623  		return Yxxx
  2624  	}
  2625  }
  2626  
  2627  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2628  // For addr that is not V-mem returns (Yxxx, false).
  2629  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2630  	switch addr.Index {
  2631  	case REG_X0 + 0,
  2632  		REG_X0 + 1,
  2633  		REG_X0 + 2,
  2634  		REG_X0 + 3,
  2635  		REG_X0 + 4,
  2636  		REG_X0 + 5,
  2637  		REG_X0 + 6,
  2638  		REG_X0 + 7:
  2639  		return Yxvm, true
  2640  	case REG_X8 + 0,
  2641  		REG_X8 + 1,
  2642  		REG_X8 + 2,
  2643  		REG_X8 + 3,
  2644  		REG_X8 + 4,
  2645  		REG_X8 + 5,
  2646  		REG_X8 + 6,
  2647  		REG_X8 + 7:
  2648  		if ctxt.Arch.Family == sys.I386 {
  2649  			return Yxxx, true
  2650  		}
  2651  		return Yxvm, true
  2652  	case REG_X16 + 0,
  2653  		REG_X16 + 1,
  2654  		REG_X16 + 2,
  2655  		REG_X16 + 3,
  2656  		REG_X16 + 4,
  2657  		REG_X16 + 5,
  2658  		REG_X16 + 6,
  2659  		REG_X16 + 7,
  2660  		REG_X16 + 8,
  2661  		REG_X16 + 9,
  2662  		REG_X16 + 10,
  2663  		REG_X16 + 11,
  2664  		REG_X16 + 12,
  2665  		REG_X16 + 13,
  2666  		REG_X16 + 14,
  2667  		REG_X16 + 15:
  2668  		if ctxt.Arch.Family == sys.I386 {
  2669  			return Yxxx, true
  2670  		}
  2671  		return YxvmEvex, true
  2672  
  2673  	case REG_Y0 + 0,
  2674  		REG_Y0 + 1,
  2675  		REG_Y0 + 2,
  2676  		REG_Y0 + 3,
  2677  		REG_Y0 + 4,
  2678  		REG_Y0 + 5,
  2679  		REG_Y0 + 6,
  2680  		REG_Y0 + 7:
  2681  		return Yyvm, true
  2682  	case REG_Y8 + 0,
  2683  		REG_Y8 + 1,
  2684  		REG_Y8 + 2,
  2685  		REG_Y8 + 3,
  2686  		REG_Y8 + 4,
  2687  		REG_Y8 + 5,
  2688  		REG_Y8 + 6,
  2689  		REG_Y8 + 7:
  2690  		if ctxt.Arch.Family == sys.I386 {
  2691  			return Yxxx, true
  2692  		}
  2693  		return Yyvm, true
  2694  	case REG_Y16 + 0,
  2695  		REG_Y16 + 1,
  2696  		REG_Y16 + 2,
  2697  		REG_Y16 + 3,
  2698  		REG_Y16 + 4,
  2699  		REG_Y16 + 5,
  2700  		REG_Y16 + 6,
  2701  		REG_Y16 + 7,
  2702  		REG_Y16 + 8,
  2703  		REG_Y16 + 9,
  2704  		REG_Y16 + 10,
  2705  		REG_Y16 + 11,
  2706  		REG_Y16 + 12,
  2707  		REG_Y16 + 13,
  2708  		REG_Y16 + 14,
  2709  		REG_Y16 + 15:
  2710  		if ctxt.Arch.Family == sys.I386 {
  2711  			return Yxxx, true
  2712  		}
  2713  		return YyvmEvex, true
  2714  
  2715  	case REG_Z0 + 0,
  2716  		REG_Z0 + 1,
  2717  		REG_Z0 + 2,
  2718  		REG_Z0 + 3,
  2719  		REG_Z0 + 4,
  2720  		REG_Z0 + 5,
  2721  		REG_Z0 + 6,
  2722  		REG_Z0 + 7:
  2723  		return Yzvm, true
  2724  	case REG_Z8 + 0,
  2725  		REG_Z8 + 1,
  2726  		REG_Z8 + 2,
  2727  		REG_Z8 + 3,
  2728  		REG_Z8 + 4,
  2729  		REG_Z8 + 5,
  2730  		REG_Z8 + 6,
  2731  		REG_Z8 + 7,
  2732  		REG_Z8 + 8,
  2733  		REG_Z8 + 9,
  2734  		REG_Z8 + 10,
  2735  		REG_Z8 + 11,
  2736  		REG_Z8 + 12,
  2737  		REG_Z8 + 13,
  2738  		REG_Z8 + 14,
  2739  		REG_Z8 + 15,
  2740  		REG_Z8 + 16,
  2741  		REG_Z8 + 17,
  2742  		REG_Z8 + 18,
  2743  		REG_Z8 + 19,
  2744  		REG_Z8 + 20,
  2745  		REG_Z8 + 21,
  2746  		REG_Z8 + 22,
  2747  		REG_Z8 + 23:
  2748  		if ctxt.Arch.Family == sys.I386 {
  2749  			return Yxxx, true
  2750  		}
  2751  		return Yzvm, true
  2752  	}
  2753  
  2754  	return Yxxx, false
  2755  }
  2756  
  2757  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2758  	switch a.Type {
  2759  	case obj.TYPE_REGLIST:
  2760  		return oclassRegList(ctxt, a)
  2761  
  2762  	case obj.TYPE_NONE:
  2763  		return Ynone
  2764  
  2765  	case obj.TYPE_BRANCH:
  2766  		return Ybr
  2767  
  2768  	case obj.TYPE_INDIR:
  2769  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2770  			return Yindir
  2771  		}
  2772  		return Yxxx
  2773  
  2774  	case obj.TYPE_MEM:
  2775  		// Pseudo registers have negative index, but SP is
  2776  		// not pseudo on x86, hence REG_SP check is not redundant.
  2777  		if a.Index == REG_SP || a.Index < 0 {
  2778  			// Can't use FP/SB/PC/SP as the index register.
  2779  			return Yxxx
  2780  		}
  2781  
  2782  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2783  			return vmem
  2784  		}
  2785  
  2786  		if ctxt.Arch.Family == sys.AMD64 {
  2787  			switch a.Name {
  2788  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2789  				// Global variables can't use index registers and their
  2790  				// base register is %rip (%rip is encoded as REG_NONE).
  2791  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2792  					return Yxxx
  2793  				}
  2794  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2795  				// These names must have a base of SP.  The old compiler
  2796  				// uses 0 for the base register. SSA uses REG_SP.
  2797  				if a.Reg != REG_SP && a.Reg != 0 {
  2798  					return Yxxx
  2799  				}
  2800  			case obj.NAME_NONE:
  2801  				// everything is ok
  2802  			default:
  2803  				// unknown name
  2804  				return Yxxx
  2805  			}
  2806  		}
  2807  		return Ym
  2808  
  2809  	case obj.TYPE_ADDR:
  2810  		switch a.Name {
  2811  		case obj.NAME_GOTREF:
  2812  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2813  			return Yxxx
  2814  
  2815  		case obj.NAME_EXTERN,
  2816  			obj.NAME_STATIC:
  2817  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2818  				return Yi32
  2819  			}
  2820  			return Yiauto // use pc-relative addressing
  2821  
  2822  		case obj.NAME_AUTO,
  2823  			obj.NAME_PARAM:
  2824  			return Yiauto
  2825  		}
  2826  
  2827  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2828  		// and got Yi32 in an earlier version of this code.
  2829  		// Keep doing that until we fix yduff etc.
  2830  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2831  			return Yi32
  2832  		}
  2833  
  2834  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2835  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2836  		}
  2837  		fallthrough
  2838  
  2839  	case obj.TYPE_CONST:
  2840  		if a.Sym != nil {
  2841  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2842  		}
  2843  
  2844  		v := a.Offset
  2845  		if ctxt.Arch.Family == sys.I386 {
  2846  			v = int64(int32(v))
  2847  		}
  2848  		switch {
  2849  		case v == 0:
  2850  			return Yi0
  2851  		case v == 1:
  2852  			return Yi1
  2853  		case v >= 0 && v <= 3:
  2854  			return Yu2
  2855  		case v >= 0 && v <= 127:
  2856  			return Yu7
  2857  		case v >= 0 && v <= 255:
  2858  			return Yu8
  2859  		case v >= -128 && v <= 127:
  2860  			return Yi8
  2861  		}
  2862  		if ctxt.Arch.Family == sys.I386 {
  2863  			return Yi32
  2864  		}
  2865  		l := int32(v)
  2866  		if int64(l) == v {
  2867  			return Ys32 // can sign extend
  2868  		}
  2869  		if v>>32 == 0 {
  2870  			return Yi32 // unsigned
  2871  		}
  2872  		return Yi64
  2873  
  2874  	case obj.TYPE_TEXTSIZE:
  2875  		return Ytextsize
  2876  	}
  2877  
  2878  	if a.Type != obj.TYPE_REG {
  2879  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2880  		return Yxxx
  2881  	}
  2882  
  2883  	switch a.Reg {
  2884  	case REG_AL:
  2885  		return Yal
  2886  
  2887  	case REG_AX:
  2888  		return Yax
  2889  
  2890  		/*
  2891  			case REG_SPB:
  2892  		*/
  2893  	case REG_BPB,
  2894  		REG_SIB,
  2895  		REG_DIB,
  2896  		REG_R8B,
  2897  		REG_R9B,
  2898  		REG_R10B,
  2899  		REG_R11B,
  2900  		REG_R12B,
  2901  		REG_R13B,
  2902  		REG_R14B,
  2903  		REG_R15B:
  2904  		if ctxt.Arch.Family == sys.I386 {
  2905  			return Yxxx
  2906  		}
  2907  		fallthrough
  2908  
  2909  	case REG_DL,
  2910  		REG_BL,
  2911  		REG_AH,
  2912  		REG_CH,
  2913  		REG_DH,
  2914  		REG_BH:
  2915  		return Yrb
  2916  
  2917  	case REG_CL:
  2918  		return Ycl
  2919  
  2920  	case REG_CX:
  2921  		return Ycx
  2922  
  2923  	case REG_DX, REG_BX:
  2924  		return Yrx
  2925  
  2926  	case REG_R8, // not really Yrl
  2927  		REG_R9,
  2928  		REG_R10,
  2929  		REG_R11,
  2930  		REG_R12,
  2931  		REG_R13,
  2932  		REG_R14,
  2933  		REG_R15:
  2934  		if ctxt.Arch.Family == sys.I386 {
  2935  			return Yxxx
  2936  		}
  2937  		fallthrough
  2938  
  2939  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2940  		if ctxt.Arch.Family == sys.I386 {
  2941  			return Yrl32
  2942  		}
  2943  		return Yrl
  2944  
  2945  	case REG_F0 + 0:
  2946  		return Yf0
  2947  
  2948  	case REG_F0 + 1,
  2949  		REG_F0 + 2,
  2950  		REG_F0 + 3,
  2951  		REG_F0 + 4,
  2952  		REG_F0 + 5,
  2953  		REG_F0 + 6,
  2954  		REG_F0 + 7:
  2955  		return Yrf
  2956  
  2957  	case REG_M0 + 0,
  2958  		REG_M0 + 1,
  2959  		REG_M0 + 2,
  2960  		REG_M0 + 3,
  2961  		REG_M0 + 4,
  2962  		REG_M0 + 5,
  2963  		REG_M0 + 6,
  2964  		REG_M0 + 7:
  2965  		return Ymr
  2966  
  2967  	case REG_X0:
  2968  		return Yxr0
  2969  
  2970  	case REG_X0 + 1,
  2971  		REG_X0 + 2,
  2972  		REG_X0 + 3,
  2973  		REG_X0 + 4,
  2974  		REG_X0 + 5,
  2975  		REG_X0 + 6,
  2976  		REG_X0 + 7,
  2977  		REG_X0 + 8,
  2978  		REG_X0 + 9,
  2979  		REG_X0 + 10,
  2980  		REG_X0 + 11,
  2981  		REG_X0 + 12,
  2982  		REG_X0 + 13,
  2983  		REG_X0 + 14,
  2984  		REG_X0 + 15:
  2985  		return Yxr
  2986  
  2987  	case REG_X0 + 16,
  2988  		REG_X0 + 17,
  2989  		REG_X0 + 18,
  2990  		REG_X0 + 19,
  2991  		REG_X0 + 20,
  2992  		REG_X0 + 21,
  2993  		REG_X0 + 22,
  2994  		REG_X0 + 23,
  2995  		REG_X0 + 24,
  2996  		REG_X0 + 25,
  2997  		REG_X0 + 26,
  2998  		REG_X0 + 27,
  2999  		REG_X0 + 28,
  3000  		REG_X0 + 29,
  3001  		REG_X0 + 30,
  3002  		REG_X0 + 31:
  3003  		return YxrEvex
  3004  
  3005  	case REG_Y0 + 0,
  3006  		REG_Y0 + 1,
  3007  		REG_Y0 + 2,
  3008  		REG_Y0 + 3,
  3009  		REG_Y0 + 4,
  3010  		REG_Y0 + 5,
  3011  		REG_Y0 + 6,
  3012  		REG_Y0 + 7,
  3013  		REG_Y0 + 8,
  3014  		REG_Y0 + 9,
  3015  		REG_Y0 + 10,
  3016  		REG_Y0 + 11,
  3017  		REG_Y0 + 12,
  3018  		REG_Y0 + 13,
  3019  		REG_Y0 + 14,
  3020  		REG_Y0 + 15:
  3021  		return Yyr
  3022  
  3023  	case REG_Y0 + 16,
  3024  		REG_Y0 + 17,
  3025  		REG_Y0 + 18,
  3026  		REG_Y0 + 19,
  3027  		REG_Y0 + 20,
  3028  		REG_Y0 + 21,
  3029  		REG_Y0 + 22,
  3030  		REG_Y0 + 23,
  3031  		REG_Y0 + 24,
  3032  		REG_Y0 + 25,
  3033  		REG_Y0 + 26,
  3034  		REG_Y0 + 27,
  3035  		REG_Y0 + 28,
  3036  		REG_Y0 + 29,
  3037  		REG_Y0 + 30,
  3038  		REG_Y0 + 31:
  3039  		return YyrEvex
  3040  
  3041  	case REG_Z0 + 0,
  3042  		REG_Z0 + 1,
  3043  		REG_Z0 + 2,
  3044  		REG_Z0 + 3,
  3045  		REG_Z0 + 4,
  3046  		REG_Z0 + 5,
  3047  		REG_Z0 + 6,
  3048  		REG_Z0 + 7:
  3049  		return Yzr
  3050  
  3051  	case REG_Z0 + 8,
  3052  		REG_Z0 + 9,
  3053  		REG_Z0 + 10,
  3054  		REG_Z0 + 11,
  3055  		REG_Z0 + 12,
  3056  		REG_Z0 + 13,
  3057  		REG_Z0 + 14,
  3058  		REG_Z0 + 15,
  3059  		REG_Z0 + 16,
  3060  		REG_Z0 + 17,
  3061  		REG_Z0 + 18,
  3062  		REG_Z0 + 19,
  3063  		REG_Z0 + 20,
  3064  		REG_Z0 + 21,
  3065  		REG_Z0 + 22,
  3066  		REG_Z0 + 23,
  3067  		REG_Z0 + 24,
  3068  		REG_Z0 + 25,
  3069  		REG_Z0 + 26,
  3070  		REG_Z0 + 27,
  3071  		REG_Z0 + 28,
  3072  		REG_Z0 + 29,
  3073  		REG_Z0 + 30,
  3074  		REG_Z0 + 31:
  3075  		if ctxt.Arch.Family == sys.I386 {
  3076  			return Yxxx
  3077  		}
  3078  		return Yzr
  3079  
  3080  	case REG_K0:
  3081  		return Yk0
  3082  
  3083  	case REG_K0 + 1,
  3084  		REG_K0 + 2,
  3085  		REG_K0 + 3,
  3086  		REG_K0 + 4,
  3087  		REG_K0 + 5,
  3088  		REG_K0 + 6,
  3089  		REG_K0 + 7:
  3090  		return Yknot0
  3091  
  3092  	case REG_CS:
  3093  		return Ycs
  3094  	case REG_SS:
  3095  		return Yss
  3096  	case REG_DS:
  3097  		return Yds
  3098  	case REG_ES:
  3099  		return Yes
  3100  	case REG_FS:
  3101  		return Yfs
  3102  	case REG_GS:
  3103  		return Ygs
  3104  	case REG_TLS:
  3105  		return Ytls
  3106  
  3107  	case REG_GDTR:
  3108  		return Ygdtr
  3109  	case REG_IDTR:
  3110  		return Yidtr
  3111  	case REG_LDTR:
  3112  		return Yldtr
  3113  	case REG_MSW:
  3114  		return Ymsw
  3115  	case REG_TASK:
  3116  		return Ytask
  3117  
  3118  	case REG_CR + 0:
  3119  		return Ycr0
  3120  	case REG_CR + 1:
  3121  		return Ycr1
  3122  	case REG_CR + 2:
  3123  		return Ycr2
  3124  	case REG_CR + 3:
  3125  		return Ycr3
  3126  	case REG_CR + 4:
  3127  		return Ycr4
  3128  	case REG_CR + 5:
  3129  		return Ycr5
  3130  	case REG_CR + 6:
  3131  		return Ycr6
  3132  	case REG_CR + 7:
  3133  		return Ycr7
  3134  	case REG_CR + 8:
  3135  		return Ycr8
  3136  
  3137  	case REG_DR + 0:
  3138  		return Ydr0
  3139  	case REG_DR + 1:
  3140  		return Ydr1
  3141  	case REG_DR + 2:
  3142  		return Ydr2
  3143  	case REG_DR + 3:
  3144  		return Ydr3
  3145  	case REG_DR + 4:
  3146  		return Ydr4
  3147  	case REG_DR + 5:
  3148  		return Ydr5
  3149  	case REG_DR + 6:
  3150  		return Ydr6
  3151  	case REG_DR + 7:
  3152  		return Ydr7
  3153  
  3154  	case REG_TR + 0:
  3155  		return Ytr0
  3156  	case REG_TR + 1:
  3157  		return Ytr1
  3158  	case REG_TR + 2:
  3159  		return Ytr2
  3160  	case REG_TR + 3:
  3161  		return Ytr3
  3162  	case REG_TR + 4:
  3163  		return Ytr4
  3164  	case REG_TR + 5:
  3165  		return Ytr5
  3166  	case REG_TR + 6:
  3167  		return Ytr6
  3168  	case REG_TR + 7:
  3169  		return Ytr7
  3170  	}
  3171  
  3172  	return Yxxx
  3173  }
  3174  
  3175  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3176  // and hold assembly state.
  3177  type AsmBuf struct {
  3178  	buf      [100]byte
  3179  	off      int
  3180  	rexflag  int
  3181  	vexflag  bool // Per inst: true for VEX-encoded
  3182  	evexflag bool // Per inst: true for EVEX-encoded
  3183  	rep      bool
  3184  	repn     bool
  3185  	lock     bool
  3186  
  3187  	evex evexBits // Initialized when evexflag is true
  3188  }
  3189  
  3190  // Put1 appends one byte to the end of the buffer.
  3191  func (ab *AsmBuf) Put1(x byte) {
  3192  	ab.buf[ab.off] = x
  3193  	ab.off++
  3194  }
  3195  
  3196  // Put2 appends two bytes to the end of the buffer.
  3197  func (ab *AsmBuf) Put2(x, y byte) {
  3198  	ab.buf[ab.off+0] = x
  3199  	ab.buf[ab.off+1] = y
  3200  	ab.off += 2
  3201  }
  3202  
  3203  // Put3 appends three bytes to the end of the buffer.
  3204  func (ab *AsmBuf) Put3(x, y, z byte) {
  3205  	ab.buf[ab.off+0] = x
  3206  	ab.buf[ab.off+1] = y
  3207  	ab.buf[ab.off+2] = z
  3208  	ab.off += 3
  3209  }
  3210  
  3211  // Put4 appends four bytes to the end of the buffer.
  3212  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3213  	ab.buf[ab.off+0] = x
  3214  	ab.buf[ab.off+1] = y
  3215  	ab.buf[ab.off+2] = z
  3216  	ab.buf[ab.off+3] = w
  3217  	ab.off += 4
  3218  }
  3219  
  3220  // PutInt16 writes v into the buffer using little-endian encoding.
  3221  func (ab *AsmBuf) PutInt16(v int16) {
  3222  	ab.buf[ab.off+0] = byte(v)
  3223  	ab.buf[ab.off+1] = byte(v >> 8)
  3224  	ab.off += 2
  3225  }
  3226  
  3227  // PutInt32 writes v into the buffer using little-endian encoding.
  3228  func (ab *AsmBuf) PutInt32(v int32) {
  3229  	ab.buf[ab.off+0] = byte(v)
  3230  	ab.buf[ab.off+1] = byte(v >> 8)
  3231  	ab.buf[ab.off+2] = byte(v >> 16)
  3232  	ab.buf[ab.off+3] = byte(v >> 24)
  3233  	ab.off += 4
  3234  }
  3235  
  3236  // PutInt64 writes v into the buffer using little-endian encoding.
  3237  func (ab *AsmBuf) PutInt64(v int64) {
  3238  	ab.buf[ab.off+0] = byte(v)
  3239  	ab.buf[ab.off+1] = byte(v >> 8)
  3240  	ab.buf[ab.off+2] = byte(v >> 16)
  3241  	ab.buf[ab.off+3] = byte(v >> 24)
  3242  	ab.buf[ab.off+4] = byte(v >> 32)
  3243  	ab.buf[ab.off+5] = byte(v >> 40)
  3244  	ab.buf[ab.off+6] = byte(v >> 48)
  3245  	ab.buf[ab.off+7] = byte(v >> 56)
  3246  	ab.off += 8
  3247  }
  3248  
  3249  // Put copies b into the buffer.
  3250  func (ab *AsmBuf) Put(b []byte) {
  3251  	copy(ab.buf[ab.off:], b)
  3252  	ab.off += len(b)
  3253  }
  3254  
  3255  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3256  // starting at specified offset (e.g. z counter value).
  3257  // Trailing 0 is not written.
  3258  //
  3259  // Intended to be used for literal Z cases.
  3260  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3261  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3262  	for int(op[offset]) != 0 {
  3263  		ab.Put1(byte(op[offset]))
  3264  		offset++
  3265  	}
  3266  }
  3267  
  3268  // Insert inserts b at offset i.
  3269  func (ab *AsmBuf) Insert(i int, b byte) {
  3270  	ab.off++
  3271  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3272  	ab.buf[i] = b
  3273  }
  3274  
  3275  // Last returns the byte at the end of the buffer.
  3276  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3277  
  3278  // Len returns the length of the buffer.
  3279  func (ab *AsmBuf) Len() int { return ab.off }
  3280  
  3281  // Bytes returns the contents of the buffer.
  3282  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3283  
  3284  // Reset empties the buffer.
  3285  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3286  
  3287  // At returns the byte at offset i.
  3288  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3289  
  3290  // asmidx emits SIB byte.
  3291  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3292  	var i int
  3293  
  3294  	// X/Y index register is used in VSIB.
  3295  	switch index {
  3296  	default:
  3297  		goto bad
  3298  
  3299  	case REG_NONE:
  3300  		i = 4 << 3
  3301  		goto bas
  3302  
  3303  	case REG_R8,
  3304  		REG_R9,
  3305  		REG_R10,
  3306  		REG_R11,
  3307  		REG_R12,
  3308  		REG_R13,
  3309  		REG_R14,
  3310  		REG_R15,
  3311  		REG_X8,
  3312  		REG_X9,
  3313  		REG_X10,
  3314  		REG_X11,
  3315  		REG_X12,
  3316  		REG_X13,
  3317  		REG_X14,
  3318  		REG_X15,
  3319  		REG_X16,
  3320  		REG_X17,
  3321  		REG_X18,
  3322  		REG_X19,
  3323  		REG_X20,
  3324  		REG_X21,
  3325  		REG_X22,
  3326  		REG_X23,
  3327  		REG_X24,
  3328  		REG_X25,
  3329  		REG_X26,
  3330  		REG_X27,
  3331  		REG_X28,
  3332  		REG_X29,
  3333  		REG_X30,
  3334  		REG_X31,
  3335  		REG_Y8,
  3336  		REG_Y9,
  3337  		REG_Y10,
  3338  		REG_Y11,
  3339  		REG_Y12,
  3340  		REG_Y13,
  3341  		REG_Y14,
  3342  		REG_Y15,
  3343  		REG_Y16,
  3344  		REG_Y17,
  3345  		REG_Y18,
  3346  		REG_Y19,
  3347  		REG_Y20,
  3348  		REG_Y21,
  3349  		REG_Y22,
  3350  		REG_Y23,
  3351  		REG_Y24,
  3352  		REG_Y25,
  3353  		REG_Y26,
  3354  		REG_Y27,
  3355  		REG_Y28,
  3356  		REG_Y29,
  3357  		REG_Y30,
  3358  		REG_Y31,
  3359  		REG_Z8,
  3360  		REG_Z9,
  3361  		REG_Z10,
  3362  		REG_Z11,
  3363  		REG_Z12,
  3364  		REG_Z13,
  3365  		REG_Z14,
  3366  		REG_Z15,
  3367  		REG_Z16,
  3368  		REG_Z17,
  3369  		REG_Z18,
  3370  		REG_Z19,
  3371  		REG_Z20,
  3372  		REG_Z21,
  3373  		REG_Z22,
  3374  		REG_Z23,
  3375  		REG_Z24,
  3376  		REG_Z25,
  3377  		REG_Z26,
  3378  		REG_Z27,
  3379  		REG_Z28,
  3380  		REG_Z29,
  3381  		REG_Z30,
  3382  		REG_Z31:
  3383  		if ctxt.Arch.Family == sys.I386 {
  3384  			goto bad
  3385  		}
  3386  		fallthrough
  3387  
  3388  	case REG_AX,
  3389  		REG_CX,
  3390  		REG_DX,
  3391  		REG_BX,
  3392  		REG_BP,
  3393  		REG_SI,
  3394  		REG_DI,
  3395  		REG_X0,
  3396  		REG_X1,
  3397  		REG_X2,
  3398  		REG_X3,
  3399  		REG_X4,
  3400  		REG_X5,
  3401  		REG_X6,
  3402  		REG_X7,
  3403  		REG_Y0,
  3404  		REG_Y1,
  3405  		REG_Y2,
  3406  		REG_Y3,
  3407  		REG_Y4,
  3408  		REG_Y5,
  3409  		REG_Y6,
  3410  		REG_Y7,
  3411  		REG_Z0,
  3412  		REG_Z1,
  3413  		REG_Z2,
  3414  		REG_Z3,
  3415  		REG_Z4,
  3416  		REG_Z5,
  3417  		REG_Z6,
  3418  		REG_Z7:
  3419  		i = reg[index] << 3
  3420  	}
  3421  
  3422  	switch scale {
  3423  	default:
  3424  		goto bad
  3425  
  3426  	case 1:
  3427  		break
  3428  
  3429  	case 2:
  3430  		i |= 1 << 6
  3431  
  3432  	case 4:
  3433  		i |= 2 << 6
  3434  
  3435  	case 8:
  3436  		i |= 3 << 6
  3437  	}
  3438  
  3439  bas:
  3440  	switch base {
  3441  	default:
  3442  		goto bad
  3443  
  3444  	case REG_NONE: // must be mod=00
  3445  		i |= 5
  3446  
  3447  	case REG_R8,
  3448  		REG_R9,
  3449  		REG_R10,
  3450  		REG_R11,
  3451  		REG_R12,
  3452  		REG_R13,
  3453  		REG_R14,
  3454  		REG_R15:
  3455  		if ctxt.Arch.Family == sys.I386 {
  3456  			goto bad
  3457  		}
  3458  		fallthrough
  3459  
  3460  	case REG_AX,
  3461  		REG_CX,
  3462  		REG_DX,
  3463  		REG_BX,
  3464  		REG_SP,
  3465  		REG_BP,
  3466  		REG_SI,
  3467  		REG_DI:
  3468  		i |= reg[base]
  3469  	}
  3470  
  3471  	ab.Put1(byte(i))
  3472  	return
  3473  
  3474  bad:
  3475  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3476  	ab.Put1(0)
  3477  }
  3478  
  3479  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3480  	var rel obj.Reloc
  3481  
  3482  	v := vaddr(ctxt, p, a, &rel)
  3483  	if rel.Siz != 0 {
  3484  		if rel.Siz != 4 {
  3485  			ctxt.Diag("bad reloc")
  3486  		}
  3487  		r := obj.Addrel(cursym)
  3488  		*r = rel
  3489  		r.Off = int32(p.Pc + int64(ab.Len()))
  3490  	}
  3491  
  3492  	ab.PutInt32(int32(v))
  3493  }
  3494  
  3495  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3496  	if r != nil {
  3497  		*r = obj.Reloc{}
  3498  	}
  3499  
  3500  	switch a.Name {
  3501  	case obj.NAME_STATIC,
  3502  		obj.NAME_GOTREF,
  3503  		obj.NAME_EXTERN:
  3504  		s := a.Sym
  3505  		if r == nil {
  3506  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3507  			log.Fatalf("reloc")
  3508  		}
  3509  
  3510  		if a.Name == obj.NAME_GOTREF {
  3511  			r.Siz = 4
  3512  			r.Type = objabi.R_GOTPCREL
  3513  		} else if useAbs(ctxt, s) {
  3514  			r.Siz = 4
  3515  			r.Type = objabi.R_ADDR
  3516  		} else {
  3517  			r.Siz = 4
  3518  			r.Type = objabi.R_PCREL
  3519  		}
  3520  
  3521  		r.Off = -1 // caller must fill in
  3522  		r.Sym = s
  3523  		r.Add = a.Offset
  3524  
  3525  		return 0
  3526  	}
  3527  
  3528  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3529  		if r == nil {
  3530  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3531  			log.Fatalf("reloc")
  3532  		}
  3533  
  3534  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3535  			r.Type = objabi.R_TLS_LE
  3536  			r.Siz = 4
  3537  			r.Off = -1 // caller must fill in
  3538  			r.Add = a.Offset
  3539  		}
  3540  		return 0
  3541  	}
  3542  
  3543  	return a.Offset
  3544  }
  3545  
  3546  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3547  	var base int
  3548  	var rel obj.Reloc
  3549  
  3550  	rex &= 0x40 | Rxr
  3551  	if a.Offset != int64(int32(a.Offset)) {
  3552  		// The rules are slightly different for 386 and AMD64,
  3553  		// mostly for historical reasons. We may unify them later,
  3554  		// but it must be discussed beforehand.
  3555  		//
  3556  		// For 64bit mode only LEAL is allowed to overflow.
  3557  		// It's how https://golang.org/cl/59630 made it.
  3558  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3559  		//
  3560  		// For 32bit mode rules are more permissive.
  3561  		// If offset fits uint32, it's permitted.
  3562  		// This is allowed for assembly that wants to use 32-bit hex
  3563  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3564  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3565  			(ctxt.Arch.Family != sys.AMD64 &&
  3566  				int64(uint32(a.Offset)) == a.Offset &&
  3567  				ab.rexflag&Rxw == 0)
  3568  		if !overflowOK {
  3569  			ctxt.Diag("offset too large in %s", p)
  3570  		}
  3571  	}
  3572  	v := int32(a.Offset)
  3573  	rel.Siz = 0
  3574  
  3575  	switch a.Type {
  3576  	case obj.TYPE_ADDR:
  3577  		if a.Name == obj.NAME_NONE {
  3578  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3579  		}
  3580  		if a.Index == REG_TLS {
  3581  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3582  		}
  3583  		goto bad
  3584  
  3585  	case obj.TYPE_REG:
  3586  		const regFirst = REG_AL
  3587  		const regLast = REG_Z31
  3588  		if a.Reg < regFirst || regLast < a.Reg {
  3589  			goto bad
  3590  		}
  3591  		if v != 0 {
  3592  			goto bad
  3593  		}
  3594  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3595  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3596  		return
  3597  	}
  3598  
  3599  	if a.Type != obj.TYPE_MEM {
  3600  		goto bad
  3601  	}
  3602  
  3603  	if a.Index != REG_NONE && a.Index != REG_TLS {
  3604  		base := int(a.Reg)
  3605  		switch a.Name {
  3606  		case obj.NAME_EXTERN,
  3607  			obj.NAME_GOTREF,
  3608  			obj.NAME_STATIC:
  3609  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3610  				goto bad
  3611  			}
  3612  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3613  				// The base register has already been set. It holds the PC
  3614  				// of this instruction returned by a PC-reading thunk.
  3615  				// See obj6.go:rewriteToPcrel.
  3616  			} else {
  3617  				base = REG_NONE
  3618  			}
  3619  			v = int32(vaddr(ctxt, p, a, &rel))
  3620  
  3621  		case obj.NAME_AUTO,
  3622  			obj.NAME_PARAM:
  3623  			base = REG_SP
  3624  		}
  3625  
  3626  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3627  		if base == REG_NONE {
  3628  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3629  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3630  			goto putrelv
  3631  		}
  3632  
  3633  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3634  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3635  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3636  			return
  3637  		}
  3638  
  3639  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3640  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3641  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3642  			ab.Put1(disp8)
  3643  			return
  3644  		}
  3645  
  3646  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3647  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3648  		goto putrelv
  3649  	}
  3650  
  3651  	base = int(a.Reg)
  3652  	switch a.Name {
  3653  	case obj.NAME_STATIC,
  3654  		obj.NAME_GOTREF,
  3655  		obj.NAME_EXTERN:
  3656  		if a.Sym == nil {
  3657  			ctxt.Diag("bad addr: %v", p)
  3658  		}
  3659  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3660  			// The base register has already been set. It holds the PC
  3661  			// of this instruction returned by a PC-reading thunk.
  3662  			// See obj6.go:rewriteToPcrel.
  3663  		} else {
  3664  			base = REG_NONE
  3665  		}
  3666  		v = int32(vaddr(ctxt, p, a, &rel))
  3667  
  3668  	case obj.NAME_AUTO,
  3669  		obj.NAME_PARAM:
  3670  		base = REG_SP
  3671  	}
  3672  
  3673  	if base == REG_TLS {
  3674  		v = int32(vaddr(ctxt, p, a, &rel))
  3675  	}
  3676  
  3677  	ab.rexflag |= regrex[base]&Rxb | rex
  3678  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3679  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3680  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3681  				ctxt.Diag("%v has offset against gotref", p)
  3682  			}
  3683  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3684  			goto putrelv
  3685  		}
  3686  
  3687  		// temporary
  3688  		ab.Put2(
  3689  			byte(0<<6|4<<0|r<<3), // sib present
  3690  			0<<6|4<<3|5<<0,       // DS:d32
  3691  		)
  3692  		goto putrelv
  3693  	}
  3694  
  3695  	if base == REG_SP || base == REG_R12 {
  3696  		if v == 0 {
  3697  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3698  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3699  			return
  3700  		}
  3701  
  3702  		if disp8, ok := toDisp8(v, p, ab); ok {
  3703  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3704  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3705  			ab.Put1(disp8)
  3706  			return
  3707  		}
  3708  
  3709  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3710  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3711  		goto putrelv
  3712  	}
  3713  
  3714  	if REG_AX <= base && base <= REG_R15 {
  3715  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
  3716  			rel = obj.Reloc{}
  3717  			rel.Type = objabi.R_TLS_LE
  3718  			rel.Siz = 4
  3719  			rel.Sym = nil
  3720  			rel.Add = int64(v)
  3721  			v = 0
  3722  		}
  3723  
  3724  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3725  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3726  			return
  3727  		}
  3728  
  3729  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3730  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3731  			return
  3732  		}
  3733  
  3734  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3735  		goto putrelv
  3736  	}
  3737  
  3738  	goto bad
  3739  
  3740  putrelv:
  3741  	if rel.Siz != 0 {
  3742  		if rel.Siz != 4 {
  3743  			ctxt.Diag("bad rel")
  3744  			goto bad
  3745  		}
  3746  
  3747  		r := obj.Addrel(cursym)
  3748  		*r = rel
  3749  		r.Off = int32(p.Pc + int64(ab.Len()))
  3750  	}
  3751  
  3752  	ab.PutInt32(v)
  3753  	return
  3754  
  3755  bad:
  3756  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3757  }
  3758  
  3759  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3760  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3761  }
  3762  
  3763  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3764  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3765  }
  3766  
  3767  func bytereg(a *obj.Addr, t *uint8) {
  3768  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3769  		a.Reg += REG_AL - REG_AX
  3770  		*t = 0
  3771  	}
  3772  }
  3773  
  3774  func unbytereg(a *obj.Addr, t *uint8) {
  3775  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3776  		a.Reg += REG_AX - REG_AL
  3777  		*t = 0
  3778  	}
  3779  }
  3780  
  3781  const (
  3782  	movLit uint8 = iota // Like Zlit
  3783  	movRegMem
  3784  	movMemReg
  3785  	movRegMem2op
  3786  	movMemReg2op
  3787  	movFullPtr // Load full pointer, trash heap (unsupported)
  3788  	movDoubleShift
  3789  	movTLSReg
  3790  )
  3791  
  3792  var ymovtab = []movtab{
  3793  	// push
  3794  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3795  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3796  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3797  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3798  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3799  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3800  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3801  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3802  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3803  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3804  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3805  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3806  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3807  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3808  
  3809  	// pop
  3810  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3811  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3812  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3813  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3814  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3815  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3816  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3817  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3818  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3819  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3820  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3821  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3822  
  3823  	// mov seg
  3824  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3825  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3826  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3827  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3828  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3829  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3830  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3831  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3832  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3833  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3834  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3835  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3836  
  3837  	// mov cr
  3838  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3839  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3840  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3841  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3842  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3843  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3844  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3845  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3846  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3847  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3848  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3849  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3850  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3851  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3852  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3853  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3854  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3855  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3856  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3857  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3858  
  3859  	// mov dr
  3860  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3861  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3862  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3863  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3864  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3865  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3866  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3867  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3868  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3869  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3870  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3871  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3872  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3873  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3874  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3875  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3876  
  3877  	// mov tr
  3878  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3879  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3880  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3881  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3882  
  3883  	// lgdt, sgdt, lidt, sidt
  3884  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3885  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3886  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3887  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3888  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3889  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3890  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3891  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3892  
  3893  	// lldt, sldt
  3894  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3895  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3896  
  3897  	// lmsw, smsw
  3898  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3899  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3900  
  3901  	// ltr, str
  3902  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3903  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3904  
  3905  	/* load full pointer - unsupported
  3906  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3907  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3908  	*/
  3909  
  3910  	// double shift
  3911  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3912  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3913  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3914  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3915  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3916  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3917  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3918  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3919  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3920  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3921  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3922  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3923  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3924  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3925  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3926  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3927  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3928  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3929  
  3930  	// load TLS base
  3931  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3932  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3933  	{0, 0, 0, 0, 0, [4]uint8{}},
  3934  }
  3935  
  3936  func isax(a *obj.Addr) bool {
  3937  	switch a.Reg {
  3938  	case REG_AX, REG_AL, REG_AH:
  3939  		return true
  3940  	}
  3941  
  3942  	if a.Index == REG_AX {
  3943  		return true
  3944  	}
  3945  	return false
  3946  }
  3947  
  3948  func subreg(p *obj.Prog, from int, to int) {
  3949  	if false { /* debug['Q'] */
  3950  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3951  	}
  3952  
  3953  	if int(p.From.Reg) == from {
  3954  		p.From.Reg = int16(to)
  3955  		p.Ft = 0
  3956  	}
  3957  
  3958  	if int(p.To.Reg) == from {
  3959  		p.To.Reg = int16(to)
  3960  		p.Tt = 0
  3961  	}
  3962  
  3963  	if int(p.From.Index) == from {
  3964  		p.From.Index = int16(to)
  3965  		p.Ft = 0
  3966  	}
  3967  
  3968  	if int(p.To.Index) == from {
  3969  		p.To.Index = int16(to)
  3970  		p.Tt = 0
  3971  	}
  3972  
  3973  	if false { /* debug['Q'] */
  3974  		fmt.Printf("%v\n", p)
  3975  	}
  3976  }
  3977  
  3978  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3979  	switch op {
  3980  	case Pm, Pe, Pf2, Pf3:
  3981  		if osize != 1 {
  3982  			if op != Pm {
  3983  				ab.Put1(byte(op))
  3984  			}
  3985  			ab.Put1(Pm)
  3986  			z++
  3987  			op = int(o.op[z])
  3988  			break
  3989  		}
  3990  		fallthrough
  3991  
  3992  	default:
  3993  		if ab.Len() == 0 || ab.Last() != Pm {
  3994  			ab.Put1(Pm)
  3995  		}
  3996  	}
  3997  
  3998  	ab.Put1(byte(op))
  3999  	return z
  4000  }
  4001  
  4002  var bpduff1 = []byte{
  4003  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  4004  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  4005  }
  4006  
  4007  var bpduff2 = []byte{
  4008  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  4009  }
  4010  
  4011  // asmevex emits EVEX pregis and opcode byte.
  4012  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4013  // K-masking register.
  4014  //
  4015  // Expects asmbuf.evex to be properly initialized.
  4016  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4017  	ab.evexflag = true
  4018  	evex := ab.evex
  4019  
  4020  	rexR := byte(1)
  4021  	evexR := byte(1)
  4022  	rexX := byte(1)
  4023  	rexB := byte(1)
  4024  	if r != nil {
  4025  		if regrex[r.Reg]&Rxr != 0 {
  4026  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4027  		}
  4028  		if regrex[r.Reg]&RxrEvex != 0 {
  4029  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4030  		}
  4031  	}
  4032  	if rm != nil {
  4033  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4034  			rexX = 0
  4035  		} else if regrex[rm.Index]&Rxx != 0 {
  4036  			rexX = 0
  4037  		}
  4038  		if regrex[rm.Reg]&Rxb != 0 {
  4039  			rexB = 0
  4040  		}
  4041  	}
  4042  	// P0 = [R][X][B][R'][00][mm]
  4043  	p0 := (rexR << 7) |
  4044  		(rexX << 6) |
  4045  		(rexB << 5) |
  4046  		(evexR << 4) |
  4047  		(0 << 2) |
  4048  		(evex.M() << 0)
  4049  
  4050  	vexV := byte(0)
  4051  	if v != nil {
  4052  		// 4bit-wide reg index.
  4053  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4054  	}
  4055  	vexV ^= 0x0F
  4056  	// P1 = [W][vvvv][1][pp]
  4057  	p1 := (evex.W() << 7) |
  4058  		(vexV << 3) |
  4059  		(1 << 2) |
  4060  		(evex.P() << 0)
  4061  
  4062  	suffix := evexSuffixMap[p.Scond]
  4063  	evexZ := byte(0)
  4064  	evexLL := evex.L()
  4065  	evexB := byte(0)
  4066  	evexV := byte(1)
  4067  	evexA := byte(0)
  4068  	if suffix.zeroing {
  4069  		if !evex.ZeroingEnabled() {
  4070  			ctxt.Diag("unsupported zeroing: %v", p)
  4071  		}
  4072  		evexZ = 1
  4073  	}
  4074  	switch {
  4075  	case suffix.rounding != rcUnset:
  4076  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4077  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4078  		} else if !evex.RoundingEnabled() {
  4079  			ctxt.Diag("unsupported rounding: %v", p)
  4080  		}
  4081  		evexB = 1
  4082  		evexLL = suffix.rounding
  4083  	case suffix.broadcast:
  4084  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4085  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4086  		} else if !evex.BroadcastEnabled() {
  4087  			ctxt.Diag("unsupported broadcast: %v", p)
  4088  		}
  4089  		evexB = 1
  4090  	case suffix.sae:
  4091  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4092  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4093  		} else if !evex.SaeEnabled() {
  4094  			ctxt.Diag("unsupported SAE: %v", p)
  4095  		}
  4096  		evexB = 1
  4097  	}
  4098  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4099  		evexV = 0
  4100  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4101  		evexV = 0 // VSR selector 5th bit.
  4102  	}
  4103  	if k != nil {
  4104  		evexA = byte(reg[k.Reg])
  4105  	}
  4106  	// P2 = [z][L'L][b][V'][aaa]
  4107  	p2 := (evexZ << 7) |
  4108  		(evexLL << 5) |
  4109  		(evexB << 4) |
  4110  		(evexV << 3) |
  4111  		(evexA << 0)
  4112  
  4113  	const evexEscapeByte = 0x62
  4114  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4115  	ab.Put1(evex.opcode)
  4116  }
  4117  
  4118  // Emit VEX prefix and opcode byte.
  4119  // The three addresses are the r/m, vvvv, and reg fields.
  4120  // The reg and rm arguments appear in the same order as the
  4121  // arguments to asmand, which typically follows the call to asmvex.
  4122  // The final two arguments are the VEX prefix (see encoding above)
  4123  // and the opcode byte.
  4124  // For details about vex prefix see:
  4125  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4126  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4127  	ab.vexflag = true
  4128  	rexR := 0
  4129  	if r != nil {
  4130  		rexR = regrex[r.Reg] & Rxr
  4131  	}
  4132  	rexB := 0
  4133  	rexX := 0
  4134  	if rm != nil {
  4135  		rexB = regrex[rm.Reg] & Rxb
  4136  		rexX = regrex[rm.Index] & Rxx
  4137  	}
  4138  	vexM := (vex >> 3) & 0x7
  4139  	vexWLP := vex & 0x87
  4140  	vexV := byte(0)
  4141  	if v != nil {
  4142  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4143  	}
  4144  	vexV ^= 0xF
  4145  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4146  		// Can use 2-byte encoding.
  4147  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4148  	} else {
  4149  		// Must use 3-byte encoding.
  4150  		ab.Put3(0xc4,
  4151  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4152  			vexV<<3|vexWLP,
  4153  		)
  4154  	}
  4155  	ab.Put1(opcode)
  4156  }
  4157  
  4158  // regIndex returns register index that fits in 5 bits.
  4159  //
  4160  //	R         : 3 bit | legacy instructions     | N/A
  4161  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4162  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4163  //
  4164  // Examples:
  4165  //	REG_Z30 => 30
  4166  //	REG_X15 => 15
  4167  //	REG_R9  => 9
  4168  //	REG_AX  => 0
  4169  //
  4170  func regIndex(r int16) int {
  4171  	lower3bits := reg[r]
  4172  	high4bit := regrex[r] & Rxr << 1
  4173  	high5bit := regrex[r] & RxrEvex << 0
  4174  	return lower3bits | high4bit | high5bit
  4175  }
  4176  
  4177  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4178  // Reports errors via ctxt.
  4179  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4180  	// If any pair of the index, mask, or destination registers
  4181  	// are the same, illegal instruction trap (#UD) is triggered.
  4182  	index := regIndex(p.GetFrom3().Index)
  4183  	mask := regIndex(p.From.Reg)
  4184  	dest := regIndex(p.To.Reg)
  4185  	if dest == mask || dest == index || mask == index {
  4186  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4187  		return false
  4188  	}
  4189  
  4190  	return true
  4191  }
  4192  
  4193  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4194  // Reports errors via ctxt.
  4195  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4196  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4197  	// register is the same as index vector in VSIB.
  4198  	index := regIndex(p.From.Index)
  4199  	dest := regIndex(p.To.Reg)
  4200  	if dest == index {
  4201  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4202  		return false
  4203  	}
  4204  
  4205  	return true
  4206  }
  4207  
  4208  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4209  	o := opindex[p.As&obj.AMask]
  4210  
  4211  	if o == nil {
  4212  		ctxt.Diag("asmins: missing op %v", p)
  4213  		return
  4214  	}
  4215  
  4216  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4217  		ab.Put1(byte(pre))
  4218  	}
  4219  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4220  		ab.Put1(byte(pre))
  4221  	}
  4222  
  4223  	// Checks to warn about instruction/arguments combinations that
  4224  	// will unconditionally trigger illegal instruction trap (#UD).
  4225  	switch p.As {
  4226  	case AVGATHERDPD,
  4227  		AVGATHERQPD,
  4228  		AVGATHERDPS,
  4229  		AVGATHERQPS,
  4230  		AVPGATHERDD,
  4231  		AVPGATHERQD,
  4232  		AVPGATHERDQ,
  4233  		AVPGATHERQQ:
  4234  		// AVX512 gather requires explicit K mask.
  4235  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4236  			if !avx512gatherValid(ctxt, p) {
  4237  				return
  4238  			}
  4239  		} else {
  4240  			if !avx2gatherValid(ctxt, p) {
  4241  				return
  4242  			}
  4243  		}
  4244  	}
  4245  
  4246  	if p.Ft == 0 {
  4247  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4248  	}
  4249  	if p.Tt == 0 {
  4250  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4251  	}
  4252  
  4253  	ft := int(p.Ft) * Ymax
  4254  	var f3t int
  4255  	tt := int(p.Tt) * Ymax
  4256  
  4257  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4258  	z := 0
  4259  	var a *obj.Addr
  4260  	var l int
  4261  	var op int
  4262  	var q *obj.Prog
  4263  	var r *obj.Reloc
  4264  	var rel obj.Reloc
  4265  	var v int64
  4266  
  4267  	args := make([]int, 0, argListMax)
  4268  	if ft != Ynone*Ymax {
  4269  		args = append(args, ft)
  4270  	}
  4271  	for i := range p.RestArgs {
  4272  		args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
  4273  	}
  4274  	if tt != Ynone*Ymax {
  4275  		args = append(args, tt)
  4276  	}
  4277  
  4278  	for _, yt := range o.ytab {
  4279  		// ytab matching is purely args-based,
  4280  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4281  		// add EVEX-only filter that will reject non-EVEX matches.
  4282  		//
  4283  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4284  		// Without this rule, operands will lead to VEX-encoded form
  4285  		// and produce "c5b15813" encoding.
  4286  		if !yt.match(args) {
  4287  			// "xo" is always zero for VEX/EVEX encoded insts.
  4288  			z += int(yt.zoffset) + xo
  4289  		} else {
  4290  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4291  				// Do not signal error and continue to search
  4292  				// for matching EVEX-encoded form.
  4293  				z += int(yt.zoffset)
  4294  				continue
  4295  			}
  4296  
  4297  			switch o.prefix {
  4298  			case Px1: // first option valid only in 32-bit mode
  4299  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4300  					z += int(yt.zoffset) + xo
  4301  					continue
  4302  				}
  4303  			case Pq: // 16 bit escape and opcode escape
  4304  				ab.Put2(Pe, Pm)
  4305  
  4306  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4307  				ab.rexflag |= Pw
  4308  				ab.Put2(Pe, Pm)
  4309  
  4310  			case Pq4: // 66 0F 38
  4311  				ab.Put3(0x66, 0x0F, 0x38)
  4312  
  4313  			case Pq4w: // 66 0F 38 + REX.W
  4314  				ab.rexflag |= Pw
  4315  				ab.Put3(0x66, 0x0F, 0x38)
  4316  
  4317  			case Pq5: // F3 0F 38
  4318  				ab.Put3(0xF3, 0x0F, 0x38)
  4319  
  4320  			case Pq5w: //  F3 0F 38 + REX.W
  4321  				ab.rexflag |= Pw
  4322  				ab.Put3(0xF3, 0x0F, 0x38)
  4323  
  4324  			case Pf2, // xmm opcode escape
  4325  				Pf3:
  4326  				ab.Put2(o.prefix, Pm)
  4327  
  4328  			case Pef3:
  4329  				ab.Put3(Pe, Pf3, Pm)
  4330  
  4331  			case Pfw: // xmm opcode escape + REX.W
  4332  				ab.rexflag |= Pw
  4333  				ab.Put2(Pf3, Pm)
  4334  
  4335  			case Pm: // opcode escape
  4336  				ab.Put1(Pm)
  4337  
  4338  			case Pe: // 16 bit escape
  4339  				ab.Put1(Pe)
  4340  
  4341  			case Pw: // 64-bit escape
  4342  				if ctxt.Arch.Family != sys.AMD64 {
  4343  					ctxt.Diag("asmins: illegal 64: %v", p)
  4344  				}
  4345  				ab.rexflag |= Pw
  4346  
  4347  			case Pw8: // 64-bit escape if z >= 8
  4348  				if z >= 8 {
  4349  					if ctxt.Arch.Family != sys.AMD64 {
  4350  						ctxt.Diag("asmins: illegal 64: %v", p)
  4351  					}
  4352  					ab.rexflag |= Pw
  4353  				}
  4354  
  4355  			case Pb: // botch
  4356  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4357  					goto bad
  4358  				}
  4359  				// NOTE(rsc): This is probably safe to do always,
  4360  				// but when enabled it chooses different encodings
  4361  				// than the old cmd/internal/obj/i386 code did,
  4362  				// which breaks our "same bits out" checks.
  4363  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4364  				// in the original obj/i386, and it would encode
  4365  				// (using a valid, shorter form) as 3c 00 if we enabled
  4366  				// the call to bytereg here.
  4367  				if ctxt.Arch.Family == sys.AMD64 {
  4368  					bytereg(&p.From, &p.Ft)
  4369  					bytereg(&p.To, &p.Tt)
  4370  				}
  4371  
  4372  			case P32: // 32 bit but illegal if 64-bit mode
  4373  				if ctxt.Arch.Family == sys.AMD64 {
  4374  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4375  				}
  4376  
  4377  			case Py: // 64-bit only, no prefix
  4378  				if ctxt.Arch.Family != sys.AMD64 {
  4379  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4380  				}
  4381  
  4382  			case Py1: // 64-bit only if z < 1, no prefix
  4383  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4384  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4385  				}
  4386  
  4387  			case Py3: // 64-bit only if z < 3, no prefix
  4388  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4389  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4390  				}
  4391  			}
  4392  
  4393  			if z >= len(o.op) {
  4394  				log.Fatalf("asmins bad table %v", p)
  4395  			}
  4396  			op = int(o.op[z])
  4397  			if op == 0x0f {
  4398  				ab.Put1(byte(op))
  4399  				z++
  4400  				op = int(o.op[z])
  4401  			}
  4402  
  4403  			switch yt.zcase {
  4404  			default:
  4405  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4406  				return
  4407  
  4408  			case Zpseudo:
  4409  				break
  4410  
  4411  			case Zlit:
  4412  				ab.PutOpBytesLit(z, &o.op)
  4413  
  4414  			case Zlitr_m:
  4415  				ab.PutOpBytesLit(z, &o.op)
  4416  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4417  
  4418  			case Zlitm_r:
  4419  				ab.PutOpBytesLit(z, &o.op)
  4420  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4421  
  4422  			case Zlit_m_r:
  4423  				ab.PutOpBytesLit(z, &o.op)
  4424  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4425  
  4426  			case Zmb_r:
  4427  				bytereg(&p.From, &p.Ft)
  4428  				fallthrough
  4429  
  4430  			case Zm_r:
  4431  				ab.Put1(byte(op))
  4432  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4433  
  4434  			case Z_m_r:
  4435  				ab.Put1(byte(op))
  4436  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4437  
  4438  			case Zm2_r:
  4439  				ab.Put2(byte(op), o.op[z+1])
  4440  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4441  
  4442  			case Zm_r_xm:
  4443  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4444  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4445  
  4446  			case Zm_r_xm_nr:
  4447  				ab.rexflag = 0
  4448  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4449  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4450  
  4451  			case Zm_r_i_xm:
  4452  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4453  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4454  				ab.Put1(byte(p.To.Offset))
  4455  
  4456  			case Zibm_r, Zibr_m:
  4457  				ab.PutOpBytesLit(z, &o.op)
  4458  				if yt.zcase == Zibr_m {
  4459  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4460  				} else {
  4461  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4462  				}
  4463  				switch {
  4464  				default:
  4465  					ab.Put1(byte(p.From.Offset))
  4466  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4467  					ab.PutInt16(int16(p.From.Offset))
  4468  				case yt.args[0] == Yi32:
  4469  					ab.PutInt32(int32(p.From.Offset))
  4470  				}
  4471  
  4472  			case Zaut_r:
  4473  				ab.Put1(0x8d) // leal
  4474  				if p.From.Type != obj.TYPE_ADDR {
  4475  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4476  				}
  4477  				p.From.Type = obj.TYPE_MEM
  4478  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4479  				p.From.Type = obj.TYPE_ADDR
  4480  
  4481  			case Zm_o:
  4482  				ab.Put1(byte(op))
  4483  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4484  
  4485  			case Zr_m:
  4486  				ab.Put1(byte(op))
  4487  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4488  
  4489  			case Zvex:
  4490  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4491  
  4492  			case Zvex_rm_v_r:
  4493  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4494  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4495  
  4496  			case Zvex_rm_v_ro:
  4497  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4498  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4499  
  4500  			case Zvex_i_rm_vo:
  4501  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4502  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4503  				ab.Put1(byte(p.From.Offset))
  4504  
  4505  			case Zvex_i_r_v:
  4506  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4507  				regnum := byte(0x7)
  4508  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4509  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4510  				} else {
  4511  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4512  				}
  4513  				ab.Put1(o.op[z+2] | regnum)
  4514  				ab.Put1(byte(p.From.Offset))
  4515  
  4516  			case Zvex_i_rm_v_r:
  4517  				imm, from, from3, to := unpackOps4(p)
  4518  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4519  				ab.asmand(ctxt, cursym, p, from, to)
  4520  				ab.Put1(byte(imm.Offset))
  4521  
  4522  			case Zvex_i_rm_r:
  4523  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4524  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4525  				ab.Put1(byte(p.From.Offset))
  4526  
  4527  			case Zvex_v_rm_r:
  4528  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4529  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4530  
  4531  			case Zvex_r_v_rm:
  4532  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4533  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4534  
  4535  			case Zvex_rm_r_vo:
  4536  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4537  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4538  
  4539  			case Zvex_i_r_rm:
  4540  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4541  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4542  				ab.Put1(byte(p.From.Offset))
  4543  
  4544  			case Zvex_hr_rm_v_r:
  4545  				hr, from, from3, to := unpackOps4(p)
  4546  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4547  				ab.asmand(ctxt, cursym, p, from, to)
  4548  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4549  
  4550  			case Zevex_k_rmo:
  4551  				ab.evex = newEVEXBits(z, &o.op)
  4552  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4553  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4554  
  4555  			case Zevex_i_rm_vo:
  4556  				ab.evex = newEVEXBits(z, &o.op)
  4557  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4558  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4559  				ab.Put1(byte(p.From.Offset))
  4560  
  4561  			case Zevex_i_rm_k_vo:
  4562  				imm, from, kmask, to := unpackOps4(p)
  4563  				ab.evex = newEVEXBits(z, &o.op)
  4564  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4565  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4566  				ab.Put1(byte(imm.Offset))
  4567  
  4568  			case Zevex_i_r_rm:
  4569  				ab.evex = newEVEXBits(z, &o.op)
  4570  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4571  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4572  				ab.Put1(byte(p.From.Offset))
  4573  
  4574  			case Zevex_i_r_k_rm:
  4575  				imm, from, kmask, to := unpackOps4(p)
  4576  				ab.evex = newEVEXBits(z, &o.op)
  4577  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4578  				ab.asmand(ctxt, cursym, p, to, from)
  4579  				ab.Put1(byte(imm.Offset))
  4580  
  4581  			case Zevex_i_rm_r:
  4582  				ab.evex = newEVEXBits(z, &o.op)
  4583  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4584  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4585  				ab.Put1(byte(p.From.Offset))
  4586  
  4587  			case Zevex_i_rm_k_r:
  4588  				imm, from, kmask, to := unpackOps4(p)
  4589  				ab.evex = newEVEXBits(z, &o.op)
  4590  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4591  				ab.asmand(ctxt, cursym, p, from, to)
  4592  				ab.Put1(byte(imm.Offset))
  4593  
  4594  			case Zevex_i_rm_v_r:
  4595  				imm, from, from3, to := unpackOps4(p)
  4596  				ab.evex = newEVEXBits(z, &o.op)
  4597  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4598  				ab.asmand(ctxt, cursym, p, from, to)
  4599  				ab.Put1(byte(imm.Offset))
  4600  
  4601  			case Zevex_i_rm_v_k_r:
  4602  				imm, from, from3, kmask, to := unpackOps5(p)
  4603  				ab.evex = newEVEXBits(z, &o.op)
  4604  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4605  				ab.asmand(ctxt, cursym, p, from, to)
  4606  				ab.Put1(byte(imm.Offset))
  4607  
  4608  			case Zevex_r_v_rm:
  4609  				ab.evex = newEVEXBits(z, &o.op)
  4610  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4611  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4612  
  4613  			case Zevex_rm_v_r:
  4614  				ab.evex = newEVEXBits(z, &o.op)
  4615  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4616  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4617  
  4618  			case Zevex_rm_k_r:
  4619  				ab.evex = newEVEXBits(z, &o.op)
  4620  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4621  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4622  
  4623  			case Zevex_r_k_rm:
  4624  				ab.evex = newEVEXBits(z, &o.op)
  4625  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4626  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4627  
  4628  			case Zevex_rm_v_k_r:
  4629  				from, from3, kmask, to := unpackOps4(p)
  4630  				ab.evex = newEVEXBits(z, &o.op)
  4631  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4632  				ab.asmand(ctxt, cursym, p, from, to)
  4633  
  4634  			case Zevex_r_v_k_rm:
  4635  				from, from3, kmask, to := unpackOps4(p)
  4636  				ab.evex = newEVEXBits(z, &o.op)
  4637  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4638  				ab.asmand(ctxt, cursym, p, to, from)
  4639  
  4640  			case Zr_m_xm:
  4641  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4642  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4643  
  4644  			case Zr_m_xm_nr:
  4645  				ab.rexflag = 0
  4646  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4647  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4648  
  4649  			case Zo_m:
  4650  				ab.Put1(byte(op))
  4651  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4652  
  4653  			case Zcallindreg:
  4654  				r = obj.Addrel(cursym)
  4655  				r.Off = int32(p.Pc)
  4656  				r.Type = objabi.R_CALLIND
  4657  				r.Siz = 0
  4658  				fallthrough
  4659  
  4660  			case Zo_m64:
  4661  				ab.Put1(byte(op))
  4662  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4663  
  4664  			case Zm_ibo:
  4665  				ab.Put1(byte(op))
  4666  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4667  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4668  
  4669  			case Zibo_m:
  4670  				ab.Put1(byte(op))
  4671  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4672  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4673  
  4674  			case Zibo_m_xm:
  4675  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4676  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4677  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4678  
  4679  			case Z_ib, Zib_:
  4680  				if yt.zcase == Zib_ {
  4681  					a = &p.From
  4682  				} else {
  4683  					a = &p.To
  4684  				}
  4685  				ab.Put1(byte(op))
  4686  				if p.As == AXABORT {
  4687  					ab.Put1(o.op[z+1])
  4688  				}
  4689  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4690  
  4691  			case Zib_rp:
  4692  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4693  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4694  
  4695  			case Zil_rp:
  4696  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4697  				ab.Put1(byte(op + reg[p.To.Reg]))
  4698  				if o.prefix == Pe {
  4699  					v = vaddr(ctxt, p, &p.From, nil)
  4700  					ab.PutInt16(int16(v))
  4701  				} else {
  4702  					ab.relput4(ctxt, cursym, p, &p.From)
  4703  				}
  4704  
  4705  			case Zo_iw:
  4706  				ab.Put1(byte(op))
  4707  				if p.From.Type != obj.TYPE_NONE {
  4708  					v = vaddr(ctxt, p, &p.From, nil)
  4709  					ab.PutInt16(int16(v))
  4710  				}
  4711  
  4712  			case Ziq_rp:
  4713  				v = vaddr(ctxt, p, &p.From, &rel)
  4714  				l = int(v >> 32)
  4715  				if l == 0 && rel.Siz != 8 {
  4716  					ab.rexflag &^= (0x40 | Rxw)
  4717  
  4718  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4719  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4720  					if rel.Type != 0 {
  4721  						r = obj.Addrel(cursym)
  4722  						*r = rel
  4723  						r.Off = int32(p.Pc + int64(ab.Len()))
  4724  					}
  4725  
  4726  					ab.PutInt32(int32(v))
  4727  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4728  					ab.Put1(0xc7)
  4729  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4730  
  4731  					ab.PutInt32(int32(v)) // need all 8
  4732  				} else {
  4733  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4734  					ab.Put1(byte(op + reg[p.To.Reg]))
  4735  					if rel.Type != 0 {
  4736  						r = obj.Addrel(cursym)
  4737  						*r = rel
  4738  						r.Off = int32(p.Pc + int64(ab.Len()))
  4739  					}
  4740  
  4741  					ab.PutInt64(v)
  4742  				}
  4743  
  4744  			case Zib_rr:
  4745  				ab.Put1(byte(op))
  4746  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4747  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4748  
  4749  			case Z_il, Zil_:
  4750  				if yt.zcase == Zil_ {
  4751  					a = &p.From
  4752  				} else {
  4753  					a = &p.To
  4754  				}
  4755  				ab.Put1(byte(op))
  4756  				if o.prefix == Pe {
  4757  					v = vaddr(ctxt, p, a, nil)
  4758  					ab.PutInt16(int16(v))
  4759  				} else {
  4760  					ab.relput4(ctxt, cursym, p, a)
  4761  				}
  4762  
  4763  			case Zm_ilo, Zilo_m:
  4764  				ab.Put1(byte(op))
  4765  				if yt.zcase == Zilo_m {
  4766  					a = &p.From
  4767  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4768  				} else {
  4769  					a = &p.To
  4770  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4771  				}
  4772  
  4773  				if o.prefix == Pe {
  4774  					v = vaddr(ctxt, p, a, nil)
  4775  					ab.PutInt16(int16(v))
  4776  				} else {
  4777  					ab.relput4(ctxt, cursym, p, a)
  4778  				}
  4779  
  4780  			case Zil_rr:
  4781  				ab.Put1(byte(op))
  4782  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4783  				if o.prefix == Pe {
  4784  					v = vaddr(ctxt, p, &p.From, nil)
  4785  					ab.PutInt16(int16(v))
  4786  				} else {
  4787  					ab.relput4(ctxt, cursym, p, &p.From)
  4788  				}
  4789  
  4790  			case Z_rp:
  4791  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4792  				ab.Put1(byte(op + reg[p.To.Reg]))
  4793  
  4794  			case Zrp_:
  4795  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4796  				ab.Put1(byte(op + reg[p.From.Reg]))
  4797  
  4798  			case Zcallcon, Zjmpcon:
  4799  				if yt.zcase == Zcallcon {
  4800  					ab.Put1(byte(op))
  4801  				} else {
  4802  					ab.Put1(o.op[z+1])
  4803  				}
  4804  				r = obj.Addrel(cursym)
  4805  				r.Off = int32(p.Pc + int64(ab.Len()))
  4806  				r.Type = objabi.R_PCREL
  4807  				r.Siz = 4
  4808  				r.Add = p.To.Offset
  4809  				ab.PutInt32(0)
  4810  
  4811  			case Zcallind:
  4812  				ab.Put2(byte(op), o.op[z+1])
  4813  				r = obj.Addrel(cursym)
  4814  				r.Off = int32(p.Pc + int64(ab.Len()))
  4815  				if ctxt.Arch.Family == sys.AMD64 {
  4816  					r.Type = objabi.R_PCREL
  4817  				} else {
  4818  					r.Type = objabi.R_ADDR
  4819  				}
  4820  				r.Siz = 4
  4821  				r.Add = p.To.Offset
  4822  				r.Sym = p.To.Sym
  4823  				ab.PutInt32(0)
  4824  
  4825  			case Zcall, Zcallduff:
  4826  				if p.To.Sym == nil {
  4827  					ctxt.Diag("call without target")
  4828  					ctxt.DiagFlush()
  4829  					log.Fatalf("bad code")
  4830  				}
  4831  
  4832  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4833  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4834  				}
  4835  
  4836  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4837  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4838  					// (the call jumps into the middle of the function).
  4839  					// This makes it possible to see call sites for duffcopy/duffzero in
  4840  					// BP-based profiling tools like Linux perf (which is the
  4841  					// whole point of maintaining frame pointers in Go).
  4842  					// MOVQ BP, -16(SP)
  4843  					// LEAQ -16(SP), BP
  4844  					ab.Put(bpduff1)
  4845  				}
  4846  				ab.Put1(byte(op))
  4847  				r = obj.Addrel(cursym)
  4848  				r.Off = int32(p.Pc + int64(ab.Len()))
  4849  				r.Sym = p.To.Sym
  4850  				r.Add = p.To.Offset
  4851  				r.Type = objabi.R_CALL
  4852  				r.Siz = 4
  4853  				ab.PutInt32(0)
  4854  
  4855  				if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4856  					// Pop BP pushed above.
  4857  					// MOVQ 0(BP), BP
  4858  					ab.Put(bpduff2)
  4859  				}
  4860  
  4861  			// TODO: jump across functions needs reloc
  4862  			case Zbr, Zjmp, Zloop:
  4863  				if p.As == AXBEGIN {
  4864  					ab.Put1(byte(op))
  4865  				}
  4866  				if p.To.Sym != nil {
  4867  					if yt.zcase != Zjmp {
  4868  						ctxt.Diag("branch to ATEXT")
  4869  						ctxt.DiagFlush()
  4870  						log.Fatalf("bad code")
  4871  					}
  4872  
  4873  					ab.Put1(o.op[z+1])
  4874  					r = obj.Addrel(cursym)
  4875  					r.Off = int32(p.Pc + int64(ab.Len()))
  4876  					r.Sym = p.To.Sym
  4877  					// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4878  					// it can point to a trampoline instead of the destination itself.
  4879  					r.Type = objabi.R_CALL
  4880  					r.Siz = 4
  4881  					ab.PutInt32(0)
  4882  					break
  4883  				}
  4884  
  4885  				// Assumes q is in this function.
  4886  				// TODO: Check in input, preserve in brchain.
  4887  
  4888  				// Fill in backward jump now.
  4889  				q = p.To.Target()
  4890  
  4891  				if q == nil {
  4892  					ctxt.Diag("jmp/branch/loop without target")
  4893  					ctxt.DiagFlush()
  4894  					log.Fatalf("bad code")
  4895  				}
  4896  
  4897  				if p.Back&branchBackwards != 0 {
  4898  					v = q.Pc - (p.Pc + 2)
  4899  					if v >= -128 && p.As != AXBEGIN {
  4900  						if p.As == AJCXZL {
  4901  							ab.Put1(0x67)
  4902  						}
  4903  						ab.Put2(byte(op), byte(v))
  4904  					} else if yt.zcase == Zloop {
  4905  						ctxt.Diag("loop too far: %v", p)
  4906  					} else {
  4907  						v -= 5 - 2
  4908  						if p.As == AXBEGIN {
  4909  							v--
  4910  						}
  4911  						if yt.zcase == Zbr {
  4912  							ab.Put1(0x0f)
  4913  							v--
  4914  						}
  4915  
  4916  						ab.Put1(o.op[z+1])
  4917  						ab.PutInt32(int32(v))
  4918  					}
  4919  
  4920  					break
  4921  				}
  4922  
  4923  				// Annotate target; will fill in later.
  4924  				p.Forwd = q.Rel
  4925  
  4926  				q.Rel = p
  4927  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4928  					if p.As == AJCXZL {
  4929  						ab.Put1(0x67)
  4930  					}
  4931  					ab.Put2(byte(op), 0)
  4932  				} else if yt.zcase == Zloop {
  4933  					ctxt.Diag("loop too far: %v", p)
  4934  				} else {
  4935  					if yt.zcase == Zbr {
  4936  						ab.Put1(0x0f)
  4937  					}
  4938  					ab.Put1(o.op[z+1])
  4939  					ab.PutInt32(0)
  4940  				}
  4941  
  4942  			case Zbyte:
  4943  				v = vaddr(ctxt, p, &p.From, &rel)
  4944  				if rel.Siz != 0 {
  4945  					rel.Siz = uint8(op)
  4946  					r = obj.Addrel(cursym)
  4947  					*r = rel
  4948  					r.Off = int32(p.Pc + int64(ab.Len()))
  4949  				}
  4950  
  4951  				ab.Put1(byte(v))
  4952  				if op > 1 {
  4953  					ab.Put1(byte(v >> 8))
  4954  					if op > 2 {
  4955  						ab.PutInt16(int16(v >> 16))
  4956  						if op > 4 {
  4957  							ab.PutInt32(int32(v >> 32))
  4958  						}
  4959  					}
  4960  				}
  4961  			}
  4962  
  4963  			return
  4964  		}
  4965  	}
  4966  	f3t = Ynone * Ymax
  4967  	if p.GetFrom3() != nil {
  4968  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4969  	}
  4970  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4971  		var pp obj.Prog
  4972  		var t []byte
  4973  		if p.As == mo[0].as {
  4974  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4975  				t = mo[0].op[:]
  4976  				switch mo[0].code {
  4977  				default:
  4978  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4979  
  4980  				case movLit:
  4981  					for z = 0; t[z] != 0; z++ {
  4982  						ab.Put1(t[z])
  4983  					}
  4984  
  4985  				case movRegMem:
  4986  					ab.Put1(t[0])
  4987  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4988  
  4989  				case movMemReg:
  4990  					ab.Put1(t[0])
  4991  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4992  
  4993  				case movRegMem2op: // r,m - 2op
  4994  					ab.Put2(t[0], t[1])
  4995  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4996  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4997  
  4998  				case movMemReg2op:
  4999  					ab.Put2(t[0], t[1])
  5000  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  5001  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  5002  
  5003  				case movFullPtr:
  5004  					if t[0] != 0 {
  5005  						ab.Put1(t[0])
  5006  					}
  5007  					switch p.To.Index {
  5008  					default:
  5009  						goto bad
  5010  
  5011  					case REG_DS:
  5012  						ab.Put1(0xc5)
  5013  
  5014  					case REG_SS:
  5015  						ab.Put2(0x0f, 0xb2)
  5016  
  5017  					case REG_ES:
  5018  						ab.Put1(0xc4)
  5019  
  5020  					case REG_FS:
  5021  						ab.Put2(0x0f, 0xb4)
  5022  
  5023  					case REG_GS:
  5024  						ab.Put2(0x0f, 0xb5)
  5025  					}
  5026  
  5027  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5028  
  5029  				case movDoubleShift:
  5030  					if t[0] == Pw {
  5031  						if ctxt.Arch.Family != sys.AMD64 {
  5032  							ctxt.Diag("asmins: illegal 64: %v", p)
  5033  						}
  5034  						ab.rexflag |= Pw
  5035  						t = t[1:]
  5036  					} else if t[0] == Pe {
  5037  						ab.Put1(Pe)
  5038  						t = t[1:]
  5039  					}
  5040  
  5041  					switch p.From.Type {
  5042  					default:
  5043  						goto bad
  5044  
  5045  					case obj.TYPE_CONST:
  5046  						ab.Put2(0x0f, t[0])
  5047  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5048  						ab.Put1(byte(p.From.Offset))
  5049  
  5050  					case obj.TYPE_REG:
  5051  						switch p.From.Reg {
  5052  						default:
  5053  							goto bad
  5054  
  5055  						case REG_CL, REG_CX:
  5056  							ab.Put2(0x0f, t[1])
  5057  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5058  						}
  5059  					}
  5060  
  5061  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5062  				// where you load the TLS base register into a register and then index off that
  5063  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5064  				// are handled in prefixof above and should not be listed here.
  5065  				case movTLSReg:
  5066  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5067  						ctxt.Diag("invalid load of TLS: %v", p)
  5068  					}
  5069  
  5070  					if ctxt.Arch.Family == sys.I386 {
  5071  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5072  						// where you load the TLS base register into a register and then index off that
  5073  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5074  						// are handled in prefixof above and should not be listed here.
  5075  						switch ctxt.Headtype {
  5076  						default:
  5077  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5078  
  5079  						case objabi.Hlinux, objabi.Hfreebsd:
  5080  							if ctxt.Flag_shared {
  5081  								// Note that this is not generating the same insns as the other cases.
  5082  								//     MOV TLS, dst
  5083  								// becomes
  5084  								//     call __x86.get_pc_thunk.dst
  5085  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5086  								// which is encoded as
  5087  								//     call __x86.get_pc_thunk.dst
  5088  								//     movq 0(dst), dst
  5089  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5090  								// is g, which we can't check here, but will when we assemble the second
  5091  								// instruction.
  5092  								dst := p.To.Reg
  5093  								ab.Put1(0xe8)
  5094  								r = obj.Addrel(cursym)
  5095  								r.Off = int32(p.Pc + int64(ab.Len()))
  5096  								r.Type = objabi.R_CALL
  5097  								r.Siz = 4
  5098  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  5099  								ab.PutInt32(0)
  5100  
  5101  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5102  								r = obj.Addrel(cursym)
  5103  								r.Off = int32(p.Pc + int64(ab.Len()))
  5104  								r.Type = objabi.R_TLS_IE
  5105  								r.Siz = 4
  5106  								r.Add = 2
  5107  								ab.PutInt32(0)
  5108  							} else {
  5109  								// ELF TLS base is 0(GS).
  5110  								pp.From = p.From
  5111  
  5112  								pp.From.Type = obj.TYPE_MEM
  5113  								pp.From.Reg = REG_GS
  5114  								pp.From.Offset = 0
  5115  								pp.From.Index = REG_NONE
  5116  								pp.From.Scale = 0
  5117  								ab.Put2(0x65, // GS
  5118  									0x8B)
  5119  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5120  							}
  5121  						case objabi.Hplan9:
  5122  							pp.From = obj.Addr{}
  5123  							pp.From.Type = obj.TYPE_MEM
  5124  							pp.From.Name = obj.NAME_EXTERN
  5125  							pp.From.Sym = plan9privates
  5126  							pp.From.Offset = 0
  5127  							pp.From.Index = REG_NONE
  5128  							ab.Put1(0x8B)
  5129  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5130  
  5131  						case objabi.Hwindows:
  5132  							// Windows TLS base is always 0x14(FS).
  5133  							pp.From = p.From
  5134  
  5135  							pp.From.Type = obj.TYPE_MEM
  5136  							pp.From.Reg = REG_FS
  5137  							pp.From.Offset = 0x14
  5138  							pp.From.Index = REG_NONE
  5139  							pp.From.Scale = 0
  5140  							ab.Put2(0x64, // FS
  5141  								0x8B)
  5142  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5143  						}
  5144  						break
  5145  					}
  5146  
  5147  					switch ctxt.Headtype {
  5148  					default:
  5149  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5150  
  5151  					case objabi.Hlinux, objabi.Hfreebsd:
  5152  						if !ctxt.Flag_shared {
  5153  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5154  						}
  5155  						// Note that this is not generating the same insn as the other cases.
  5156  						//     MOV TLS, R_to
  5157  						// becomes
  5158  						//     movq g@gottpoff(%rip), R_to
  5159  						// which is encoded as
  5160  						//     movq 0(%rip), R_to
  5161  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5162  						// is g, which we can't check here, but will when we assemble the second
  5163  						// instruction.
  5164  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5165  
  5166  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5167  						r = obj.Addrel(cursym)
  5168  						r.Off = int32(p.Pc + int64(ab.Len()))
  5169  						r.Type = objabi.R_TLS_IE
  5170  						r.Siz = 4
  5171  						r.Add = -4
  5172  						ab.PutInt32(0)
  5173  
  5174  					case objabi.Hplan9:
  5175  						pp.From = obj.Addr{}
  5176  						pp.From.Type = obj.TYPE_MEM
  5177  						pp.From.Name = obj.NAME_EXTERN
  5178  						pp.From.Sym = plan9privates
  5179  						pp.From.Offset = 0
  5180  						pp.From.Index = REG_NONE
  5181  						ab.rexflag |= Pw
  5182  						ab.Put1(0x8B)
  5183  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5184  
  5185  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5186  						// TLS base is 0(FS).
  5187  						pp.From = p.From
  5188  
  5189  						pp.From.Type = obj.TYPE_MEM
  5190  						pp.From.Name = obj.NAME_NONE
  5191  						pp.From.Reg = REG_NONE
  5192  						pp.From.Offset = 0
  5193  						pp.From.Index = REG_NONE
  5194  						pp.From.Scale = 0
  5195  						ab.rexflag |= Pw
  5196  						ab.Put2(0x64, // FS
  5197  							0x8B)
  5198  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5199  
  5200  					case objabi.Hwindows:
  5201  						// Windows TLS base is always 0x28(GS).
  5202  						pp.From = p.From
  5203  
  5204  						pp.From.Type = obj.TYPE_MEM
  5205  						pp.From.Name = obj.NAME_NONE
  5206  						pp.From.Reg = REG_GS
  5207  						pp.From.Offset = 0x28
  5208  						pp.From.Index = REG_NONE
  5209  						pp.From.Scale = 0
  5210  						ab.rexflag |= Pw
  5211  						ab.Put2(0x65, // GS
  5212  							0x8B)
  5213  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5214  					}
  5215  				}
  5216  				return
  5217  			}
  5218  		}
  5219  	}
  5220  	goto bad
  5221  
  5222  bad:
  5223  	if ctxt.Arch.Family != sys.AMD64 {
  5224  		// here, the assembly has failed.
  5225  		// if it's a byte instruction that has
  5226  		// unaddressable registers, try to
  5227  		// exchange registers and reissue the
  5228  		// instruction with the operands renamed.
  5229  		pp := *p
  5230  
  5231  		unbytereg(&pp.From, &pp.Ft)
  5232  		unbytereg(&pp.To, &pp.Tt)
  5233  
  5234  		z := int(p.From.Reg)
  5235  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5236  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5237  			// For now, different to keep bit-for-bit compatibility.
  5238  			if ctxt.Arch.Family == sys.I386 {
  5239  				breg := byteswapreg(ctxt, &p.To)
  5240  				if breg != REG_AX {
  5241  					ab.Put1(0x87) // xchg lhs,bx
  5242  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5243  					subreg(&pp, z, breg)
  5244  					ab.doasm(ctxt, cursym, &pp)
  5245  					ab.Put1(0x87) // xchg lhs,bx
  5246  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5247  				} else {
  5248  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5249  					subreg(&pp, z, REG_AX)
  5250  					ab.doasm(ctxt, cursym, &pp)
  5251  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5252  				}
  5253  				return
  5254  			}
  5255  
  5256  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5257  				// We certainly don't want to exchange
  5258  				// with AX if the op is MUL or DIV.
  5259  				ab.Put1(0x87) // xchg lhs,bx
  5260  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5261  				subreg(&pp, z, REG_BX)
  5262  				ab.doasm(ctxt, cursym, &pp)
  5263  				ab.Put1(0x87) // xchg lhs,bx
  5264  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5265  			} else {
  5266  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5267  				subreg(&pp, z, REG_AX)
  5268  				ab.doasm(ctxt, cursym, &pp)
  5269  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5270  			}
  5271  			return
  5272  		}
  5273  
  5274  		z = int(p.To.Reg)
  5275  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5276  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5277  			// For now, different to keep bit-for-bit compatibility.
  5278  			if ctxt.Arch.Family == sys.I386 {
  5279  				breg := byteswapreg(ctxt, &p.From)
  5280  				if breg != REG_AX {
  5281  					ab.Put1(0x87) //xchg rhs,bx
  5282  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5283  					subreg(&pp, z, breg)
  5284  					ab.doasm(ctxt, cursym, &pp)
  5285  					ab.Put1(0x87) // xchg rhs,bx
  5286  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5287  				} else {
  5288  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5289  					subreg(&pp, z, REG_AX)
  5290  					ab.doasm(ctxt, cursym, &pp)
  5291  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5292  				}
  5293  				return
  5294  			}
  5295  
  5296  			if isax(&p.From) {
  5297  				ab.Put1(0x87) // xchg rhs,bx
  5298  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5299  				subreg(&pp, z, REG_BX)
  5300  				ab.doasm(ctxt, cursym, &pp)
  5301  				ab.Put1(0x87) // xchg rhs,bx
  5302  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5303  			} else {
  5304  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5305  				subreg(&pp, z, REG_AX)
  5306  				ab.doasm(ctxt, cursym, &pp)
  5307  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5308  			}
  5309  			return
  5310  		}
  5311  	}
  5312  
  5313  	ctxt.Diag("invalid instruction: %v", p)
  5314  }
  5315  
  5316  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5317  // which is not referenced in a.
  5318  // If a is empty, it returns BX to account for MULB-like instructions
  5319  // that might use DX and AX.
  5320  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5321  	cana, canb, canc, cand := true, true, true, true
  5322  	if a.Type == obj.TYPE_NONE {
  5323  		cana, cand = false, false
  5324  	}
  5325  
  5326  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5327  		switch a.Reg {
  5328  		case REG_NONE:
  5329  			cana, cand = false, false
  5330  		case REG_AX, REG_AL, REG_AH:
  5331  			cana = false
  5332  		case REG_BX, REG_BL, REG_BH:
  5333  			canb = false
  5334  		case REG_CX, REG_CL, REG_CH:
  5335  			canc = false
  5336  		case REG_DX, REG_DL, REG_DH:
  5337  			cand = false
  5338  		}
  5339  	}
  5340  
  5341  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5342  		switch a.Index {
  5343  		case REG_AX:
  5344  			cana = false
  5345  		case REG_BX:
  5346  			canb = false
  5347  		case REG_CX:
  5348  			canc = false
  5349  		case REG_DX:
  5350  			cand = false
  5351  		}
  5352  	}
  5353  
  5354  	switch {
  5355  	case cana:
  5356  		return REG_AX
  5357  	case canb:
  5358  		return REG_BX
  5359  	case canc:
  5360  		return REG_CX
  5361  	case cand:
  5362  		return REG_DX
  5363  	default:
  5364  		ctxt.Diag("impossible byte register")
  5365  		ctxt.DiagFlush()
  5366  		log.Fatalf("bad code")
  5367  		return 0
  5368  	}
  5369  }
  5370  
  5371  func isbadbyte(a *obj.Addr) bool {
  5372  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5373  }
  5374  
  5375  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5376  	ab.Reset()
  5377  
  5378  	ab.rexflag = 0
  5379  	ab.vexflag = false
  5380  	ab.evexflag = false
  5381  	mark := ab.Len()
  5382  	ab.doasm(ctxt, cursym, p)
  5383  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5384  		// as befits the whole approach of the architecture,
  5385  		// the rex prefix must appear before the first opcode byte
  5386  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5387  		// before the 0f opcode escape!), or it might be ignored.
  5388  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5389  		if ctxt.Arch.Family != sys.AMD64 {
  5390  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5391  		}
  5392  		n := ab.Len()
  5393  		var np int
  5394  		for np = mark; np < n; np++ {
  5395  			c := ab.At(np)
  5396  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5397  				break
  5398  			}
  5399  		}
  5400  		ab.Insert(np, byte(0x40|ab.rexflag))
  5401  	}
  5402  
  5403  	n := ab.Len()
  5404  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5405  		r := &cursym.R[i]
  5406  		if int64(r.Off) < p.Pc {
  5407  			break
  5408  		}
  5409  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5410  			r.Off++
  5411  		}
  5412  		if r.Type == objabi.R_PCREL {
  5413  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5414  				// PC-relative addressing is relative to the end of the instruction,
  5415  				// but the relocations applied by the linker are relative to the end
  5416  				// of the relocation. Because immediate instruction
  5417  				// arguments can follow the PC-relative memory reference in the
  5418  				// instruction encoding, the two may not coincide. In this case,
  5419  				// adjust addend so that linker can keep relocating relative to the
  5420  				// end of the relocation.
  5421  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5422  			} else if ctxt.Arch.Family == sys.I386 {
  5423  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5424  				// assumes that the previous instruction loaded the PC of the end
  5425  				// of that instruction into CX, so the adjustment is relative to
  5426  				// that.
  5427  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5428  			}
  5429  		}
  5430  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5431  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5432  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5433  		}
  5434  
  5435  	}
  5436  }
  5437  
  5438  // unpackOps4 extracts 4 operands from p.
  5439  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5440  	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
  5441  }
  5442  
  5443  // unpackOps5 extracts 5 operands from p.
  5444  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5445  	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To
  5446  }
  5447  

View as plain text