...

Source file src/golang.org/x/arch/x86/x86spec/cleanup.go

Documentation: golang.org/x/arch/x86/x86spec

     1  // Copyright 2016 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"sort"
    11  	"strings"
    12  )
    13  
    14  // Clean up the data from the Intel manual for correctness
    15  // and to annotate details relevant to decoding or encoding,
    16  // such as whether an instruction is valid only in certain
    17  // operand size modes.
    18  
    19  // encodeReplace maps (argument, encoding) pairs to the corrected argument.
    20  // We use a suffix 1 for the register and 2 for the r/m in the modrm byte.
    21  // We use a suffix V for a register number specified in the VEX.vvvv bits.
    22  var encodeReplace = map[[2]string]string{
    23  	{"mm", "ModRM:reg"}:        "mm1",
    24  	{"mm", "ModRM:r/m"}:        "mm2",
    25  	{"mm1", "ModRM:r/m"}:       "mm2",
    26  	{"mm2", "ModRM:reg"}:       "mm1",
    27  	{"mm/m32", "ModRM:r/m"}:    "mm2/m32",
    28  	{"mm/m64", "ModRM:r/m"}:    "mm2/m64",
    29  	{"xmm", "ModRM:reg"}:       "xmm1",
    30  	{"xmm", "ModRM:r/m"}:       "xmm2",
    31  	{"xmm/m64", "ModRM:r/m"}:   "xmm2/m64",
    32  	{"xmm0", "ModRM:reg"}:      "xmm1",
    33  	{"xmm1", "ModRM:r/m"}:      "xmm2",
    34  	{"xmm1/m16", "ModRM:r/m"}:  "xmm2/m16",
    35  	{"xmm1/m32", "ModRM:r/m"}:  "xmm2/m32",
    36  	{"xmm1/m64", "ModRM:r/m"}:  "xmm2/m64",
    37  	{"xmm1/m128", "ModRM:r/m"}: "xmm2/m128",
    38  	{"xmm1/m256", "ModRM:r/m"}: "xmm2/m256",
    39  	{"xmm/m16", "ModRM:r/m"}:   "xmm2/m16",
    40  	{"xmm/m32", "ModRM:r/m"}:   "xmm2/m32",
    41  	{"xmm/m64", "ModRM:r/m"}:   "xmm2/m64",
    42  	{"xmm/m128", "ModRM:r/m"}:  "xmm2/m128",
    43  	{"xmm/m256", "ModRM:r/m"}:  "xmm2/m256",
    44  	{"xmm3", "ModRM:reg"}:      "xmm1",
    45  	{"xmm3", "ModRM:r/m"}:      "xmm2",
    46  	{"xmm3/m16", "ModRM:r/m"}:  "xmm2/m16",
    47  	{"xmm3/m32", "ModRM:r/m"}:  "xmm2/m32",
    48  	{"xmm3/m64", "ModRM:r/m"}:  "xmm2/m64",
    49  	{"xmm3/m128", "ModRM:r/m"}: "xmm2/m128",
    50  	{"xmm3/m256", "ModRM:r/m"}: "xmm2/m256",
    51  	{"xmm2", "ModRM:reg"}:      "xmm1",
    52  	{"xmm2/m16", "ModRM:reg"}:  "xmm1/m16",
    53  	{"xmm2/m32", "ModRM:reg"}:  "xmm1/m32",
    54  	{"xmm2/m64", "ModRM:reg"}:  "xmm1/m64",
    55  	{"xmm2/m128", "ModRM:reg"}: "xmm1/m128",
    56  	{"xmm2/m256", "ModRM:reg"}: "xmm1/m256",
    57  	{"ymm", "ModRM:reg"}:       "ymm1",
    58  	{"ymm", "ModRM:r/m"}:       "ymm2",
    59  	{"ymm0", "ModRM:reg"}:      "ymm1",
    60  	{"ymm1", "ModRM:r/m"}:      "ymm2",
    61  	{"ymm1/m16", "ModRM:r/m"}:  "ymm2/m16",
    62  	{"ymm1/m32", "ModRM:r/m"}:  "ymm2/m32",
    63  	{"ymm1/m64", "ModRM:r/m"}:  "ymm2/m64",
    64  	{"ymm1/m128", "ModRM:r/m"}: "ymm2/m128",
    65  	{"ymm1/m256", "ModRM:r/m"}: "ymm2/m256",
    66  	{"ymm3", "ModRM:reg"}:      "ymm1",
    67  	{"ymm3", "ModRM:r/m"}:      "ymm2",
    68  	{"ymm3/m16", "ModRM:r/m"}:  "ymm2/m16",
    69  	{"ymm3/m32", "ModRM:r/m"}:  "ymm2/m32",
    70  	{"ymm3/m64", "ModRM:r/m"}:  "ymm2/m64",
    71  	{"ymm3/m128", "ModRM:r/m"}: "ymm2/m128",
    72  	{"ymm3/m256", "ModRM:r/m"}: "ymm2/m256",
    73  	{"ymm2", "ModRM:reg"}:      "ymm1",
    74  	{"ymm2/m16", "ModRM:reg"}:  "ymm1/m16",
    75  	{"ymm2/m32", "ModRM:reg"}:  "ymm1/m32",
    76  	{"ymm2/m64", "ModRM:reg"}:  "ymm1/m64",
    77  	{"ymm2/m128", "ModRM:reg"}: "ymm1/m128",
    78  	{"ymm2/m256", "ModRM:reg"}: "ymm1/m256",
    79  	{"xmm1", "VEX.vvvv"}:       "xmmV",
    80  	{"xmm2", "VEX.vvvv"}:       "xmmV",
    81  	{"ymm1", "VEX.vvvv"}:       "ymmV",
    82  	{"ymm2", "VEX.vvvv"}:       "ymmV",
    83  	{"xmm4", "imm8[7:4]"}:      "xmmIH",
    84  	{"ymm4", "imm8[7:4]"}:      "ymmIH",
    85  	{"r8", "opcode + rd"}:      "r8op",
    86  	{"r16", "opcode + rd"}:     "r16op",
    87  	{"r32", "opcode + rd"}:     "r32op",
    88  	{"r64", "opcode + rd"}:     "r64op",
    89  	{"reg/m32", "ModRM:r/m"}:   "r/m32",
    90  	{"reg/m16", "ModRM:r/m"}:   "r32/m16",
    91  	{"bnd", "ModRM:reg"}:       "bnd1",
    92  	{"bnd2", "ModRM:reg"}:      "bnd1",
    93  	{"bnd1/m64", "ModRM:r/m"}:  "bnd2/m64",
    94  	{"bnd1/m128", "ModRM:r/m"}: "bnd2/m128",
    95  	{"r32a", "ModRM:reg"}:      "r32",
    96  	{"r64a", "ModRM:reg"}:      "r64",
    97  	{"r32", "VEX.vvvv"}:        "r32V",
    98  	{"r64", "VEX.vvvv"}:        "r64V",
    99  	{"r32b", "VEX.vvvv"}:       "r32V",
   100  	{"r64b", "VEX.vvvv"}:       "r64V",
   101  	{"r64", "VEX.vvvv"}:        "r64V",
   102  	{"ST", "ST(0)"}:            "ST(0)",
   103  }
   104  
   105  // A few instructions do not have the usual encoding descriptions.
   106  // Supply them.
   107  var encodings = map[string][]string{
   108  	"FADD m32fp":            {"ModRM:r/m (r)"},
   109  	"FADD m64fp":            {"ModRM:r/m (r)"},
   110  	"FADD ST(0), ST(i)":     {"ST(0) (r, w)", "ST(i) (r)"},
   111  	"FADD ST(i), ST(0)":     {"ST(i) (r, w)", "ST(0) (r)"},
   112  	"FADDP ST(i), ST(0)":    {"ST(i) (r, w)", "ST(0) (r)"},
   113  	"FIADD m32int":          {"ModRM:r/m (r)"},
   114  	"FIADD m16int":          {"ModRM:r/m (r)"},
   115  	"FBLD m80dec":           {"ModRM:r/m (r)"},
   116  	"FBSTP m80bcd":          {"ModRM:r/m (w)"},
   117  	"FCMOVB ST(0), ST(i)":   {"ST(0) (r, w)", "ST(i) (r)"},
   118  	"FCMOVE ST(0), ST(i)":   {"ST(0) (r, w)", "ST(i) (r)"},
   119  	"FCMOVBE ST(0), ST(i)":  {"ST(0) (r, w)", "ST(i) (r)"},
   120  	"FCMOVU ST(0), ST(i)":   {"ST(0) (r, w)", "ST(i) (r)"},
   121  	"FCMOVNB ST(0), ST(i)":  {"ST(0) (r, w)", "ST(i) (r)"},
   122  	"FCMOVNE ST(0), ST(i)":  {"ST(0) (r, w)", "ST(i) (r)"},
   123  	"FCMOVNBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
   124  	"FCMOVNU ST(0), ST(i)":  {"ST(0) (r, w)", "ST(i) (r)"},
   125  	"FCOM m32fp":            {"ModRM:r/m (r)"},
   126  	"FCOM m64fp":            {"ModRM:r/m (r)"},
   127  	"FCOM ST(i)":            {"ST(i) (r)"},
   128  	"FCOMP m32fp":           {"ModRM:r/m (r)"},
   129  	"FCOMP m64fp":           {"ModRM:r/m (r)"},
   130  	"FCOMP ST(i)":           {"ST(i) (r)"},
   131  	"FCOMI ST, ST(i)":       {"ST(0) (r)", "ST(i) (r)"},
   132  	"FCOMIP ST, ST(i)":      {"ST(0) (r)", "ST(i) (r)"},
   133  	"FUCOMI ST, ST(i)":      {"ST(0) (r)", "ST(i) (r)"},
   134  	"FUCOMIP ST, ST(i)":     {"ST(0) (r)", "ST(i) (r)"},
   135  	"FDIV m32fp":            {"ModRM:r/m (r)"},
   136  	"FDIV m64fp":            {"ModRM:r/m (r)"},
   137  	"FDIV ST(0), ST(i)":     {"ST(0) (r, w)", "ST(i) (r)"},
   138  	"FDIV ST(i), ST(0)":     {"ST(i) (r, w)", "ST(0) (r)"},
   139  	"FDIVP ST(i), ST(0)":    {"ST(i) (r, w)", "ST(0) (r)"},
   140  	"FIDIV m16int":          {"ModRM:r/m (r)"},
   141  	"FIDIV m32int":          {"ModRM:r/m (r)"},
   142  	"FIDIV m64int":          {"ModRM:r/m (r)"},
   143  	"FDIVR m32fp":           {"ModRM:r/m (r)"},
   144  	"FDIVR m64fp":           {"ModRM:r/m (r)"},
   145  	"FDIVR ST(0), ST(i)":    {"ST(0) (r, w)", "ST(i) (r)"},
   146  	"FDIVR ST(i), ST(0)":    {"ST(i) (r, w)", "ST(0) (r)"},
   147  	"FDIVRP ST(i), ST(0)":   {"ST(i) (r, w)", "ST(0) (r)"},
   148  	"FIDIVR m16int":         {"ModRM:r/m (r)"},
   149  	"FIDIVR m32int":         {"ModRM:r/m (r)"},
   150  	"FIDIVR m64int":         {"ModRM:r/m (r)"},
   151  	"FFREE ST(i)":           {"ST(i) (w)"},
   152  	"FICOM m16int":          {"ModRM:r/m (r)"},
   153  	"FICOM m32int":          {"ModRM:r/m (r)"},
   154  	"FICOMP m16int":         {"ModRM:r/m (r)"},
   155  	"FICOMP m32int":         {"ModRM:r/m (r)"},
   156  	"FILD m16int":           {"ModRM:r/m (r)"},
   157  	"FILD m32int":           {"ModRM:r/m (r)"},
   158  	"FILD m64int":           {"ModRM:r/m (r)"},
   159  	"FIST m16int":           {"ModRM:r/m (w)"},
   160  	"FIST m32int":           {"ModRM:r/m (w)"},
   161  	"FISTP m16int":          {"ModRM:r/m (w)"},
   162  	"FISTP m32int":          {"ModRM:r/m (w)"},
   163  	"FISTP m64int":          {"ModRM:r/m (w)"},
   164  	"FISTTP m16int":         {"ModRM:r/m (w)"},
   165  	"FISTTP m32int":         {"ModRM:r/m (w)"},
   166  	"FISTTP m64int":         {"ModRM:r/m (w)"},
   167  	"FLD m32fp":             {"ModRM:r/m (r)"},
   168  	"FLD m64fp":             {"ModRM:r/m (r)"},
   169  	"FLD m80fp":             {"ModRM:r/m (r)"},
   170  	"FLD ST(i)":             {"ST(i) (r)"},
   171  	"FLDCW m2byte":          {"ModRM:r/m (r)"},
   172  	"FLDENV m14/28byte":     {"ModRM:r/m (r)"},
   173  	"FMUL m32fp":            {"ModRM:r/m (r)"},
   174  	"FMUL m64fp":            {"ModRM:r/m (r)"},
   175  	"FMUL ST(0), ST(i)":     {"ST(0) (r, w)", "ST(i) (r)"},
   176  	"FMUL ST(i), ST(0)":     {"ST(i) (r, w)", "ST(0) (r)"},
   177  	"FMULP ST(i), ST(0)":    {"ST(i) (r, w)", "ST(0) (r)"},
   178  	"FIMUL m16int":          {"ModRM:r/m (r)"},
   179  	"FIMUL m32int":          {"ModRM:r/m (r)"},
   180  	"FRSTOR m94/108byte":    {"ModRM:r/m (r)"},
   181  	"FSAVE m94/108byte":     {"ModRM:r/m (w)"},
   182  	"FNSAVE m94/108byte":    {"ModRM:r/m (w)"},
   183  	"FST m32fp":             {"ModRM:r/m (w)"},
   184  	"FST m64fp":             {"ModRM:r/m (w)"},
   185  	"FST m80fp":             {"ModRM:r/m (w)"},
   186  	"FST ST(i)":             {"ST(i) (w)"},
   187  	"FSTP m32fp":            {"ModRM:r/m (w)"},
   188  	"FSTP m64fp":            {"ModRM:r/m (w)"},
   189  	"FSTP m80fp":            {"ModRM:r/m (w)"},
   190  	"FSTP ST(i)":            {"ST(i) (w)"},
   191  	"FSTCW m2byte":          {"ModRM:r/m (w)"},
   192  	"FNSTCW m2byte":         {"ModRM:r/m (w)"},
   193  	"FSTENV m14/28byte":     {"ModRM:r/m (w)"},
   194  	"FNSTENV m14/28byte":    {"ModRM:r/m (w)"},
   195  	"FSTSW m2byte":          {"ModRM:r/m (w)"},
   196  	"FSTSW AX":              {"AX (w)"},
   197  	"FNSTSW m2byte":         {"ModRM:r/m (w)"},
   198  	"FNSTSW AX":             {"AX (w)"},
   199  	"FSUB m32fp":            {"ModRM:r/m (r)"},
   200  	"FSUB m64fp":            {"ModRM:r/m (r)"},
   201  	"FSUB ST(0), ST(i)":     {"ST(0) (r, w)", "ST(i) (r)"},
   202  	"FSUB ST(i), ST(0)":     {"ST(i) (r, w)", "ST(0) (r)"},
   203  	"FSUBP ST(i), ST(0)":    {"ST(i) (r, w)", "ST(0) (r)"},
   204  	"FISUB m16int":          {"ModRM:r/m (r)"},
   205  	"FISUB m32int":          {"ModRM:r/m (r)"},
   206  	"FSUBR m32fp":           {"ModRM:r/m (r)"},
   207  	"FSUBR m64fp":           {"ModRM:r/m (r)"},
   208  	"FSUBR ST(0), ST(i)":    {"ST(0) (r, w)", "ST(i) (r)"},
   209  	"FSUBR ST(i), ST(0)":    {"ST(i) (r, w)", "ST(0) (r)"},
   210  	"FSUBRP ST(i), ST(0)":   {"ST(i) (r, w)", "ST(0) (r)"},
   211  	"FISUBR m16int":         {"ModRM:r/m (r)"},
   212  	"FISUBR m32int":         {"ModRM:r/m (r)"},
   213  	"FISUBR m64int":         {"ModRM:r/m (r)"},
   214  	"FUCOM ST(i)":           {"ST(i) (r)"},
   215  	"FUCOMP ST(i)":          {"ST(i) (r)"},
   216  	"FXCH ST(i)":            {"ST(i) (r, w)"},
   217  	"POP DS":                {"DS (w)"},
   218  	"POP ES":                {"ES (w)"},
   219  	"POP FS":                {"FS (w)"},
   220  	"POP GS":                {"GS (w)"},
   221  	"POP SS":                {"SS (w)"},
   222  	"POP CS":                {"CS (w)"},
   223  	"PUSH CS":               {"CS (r)"},
   224  	"PUSH DS":               {"DS (r)"},
   225  	"PUSH ES":               {"ES (r)"},
   226  	"PUSH FS":               {"FS (r)"},
   227  	"PUSH GS":               {"GS (r)"},
   228  	"PUSH SS":               {"SS (r)"},
   229  	"INT 3":                 {"3 (r)"},
   230  
   231  	// In manual but hard to parse
   232  	"BNDLDX bnd, mib": {"ModRM:reg (w)", "ModRM:r/m (r)"},
   233  	"BNDSTX mib, bnd": {"ModRM:r/m (r)", "ModRM:reg (r)"},
   234  
   235  	// In manual but wrong
   236  	"CALL rel16":    {"Offset"},
   237  	"CALL rel32":    {"Offset"},
   238  	"IN AL, imm8":   {"AL (w)", "imm8 (r)"},
   239  	"IN AX, imm8":   {"AX (w)", "imm8 (r)"},
   240  	"IN EAX, imm8":  {"EAX (w)", "imm8 (r)"},
   241  	"IN AL, DX":     {"AL (w)", "DX (r)"},
   242  	"IN AX, DX":     {"AX (w)", "DX (r)"},
   243  	"IN EAX, DX":    {"EAX (w)", "DX (r)"},
   244  	"OUT DX, AL":    {"DX (r)", "AL (r)"},
   245  	"OUT DX, AX":    {"DX (r)", "AX (r)"},
   246  	"OUT DX, EAX":   {"DX (r)", "EAX (r)"},
   247  	"OUT imm8, AL":  {"imm8 (r)", "AL (r)"},
   248  	"OUT imm8, AX":  {"imm8 (r)", "AX (r)"},
   249  	"OUT imm8, EAX": {"imm8 (r)", "EAX (r)"},
   250  	"XCHG AX, r16":  {"AX (r, w)", "opcode + rd (r, w)"},
   251  	"XCHG EAX, r32": {"EAX (r, w)", "opcode + rd (r, w)"},
   252  	"XCHG RAX, r64": {"RAX (r, w)", "opcode + rd (r, w)"},
   253  
   254  	// Encoding not listed.
   255  	"INVEPT r32, m128":   {"ModRM:reg (r)", "ModRM:r/m (r)"},
   256  	"INVEPT r64, m128":   {"ModRM:reg (r)", "ModRM:r/m (r)"},
   257  	"INVVPID r32, m128":  {"ModRM:reg (r)", "ModRM:r/m (r)"},
   258  	"INVVPID r64, m128":  {"ModRM:reg (r)", "ModRM:r/m (r)"},
   259  	"VMREAD r/m32, r32":  {"ModRM:r/m (w)", "ModRM:reg (r)"},
   260  	"VMREAD r/m64, r64":  {"ModRM:r/m (w)", "ModRM:reg (r)"},
   261  	"VMWRITE r32, r/m32": {"ModRM:reg (r)", "ModRM:r/m (r)"},
   262  	"VMWRITE r64, r/m64": {"ModRM:reg (r)", "ModRM:r/m (r)"},
   263  	"VMCLEAR m64":        {"ModRM:r/m (w)"},
   264  	"VMPTRLD m64":        {"ModRM:r/m (r)"},
   265  	"VMPTRST m64":        {"ModRM:r/m (w)"},
   266  	"VMXON m64":          {"ModRM:r/m (r)"},
   267  }
   268  
   269  // opAction lists the read/write actions for individual opcodes,
   270  // where the manual does not.
   271  var opAction = map[string][]string{
   272  	"ADC":         {"rw", "r"},
   273  	"ADD":         {"rw", "r"},
   274  	"AND":         {"rw", "r"},
   275  	"BLENDVPD":    {"rw", "r", "r"},
   276  	"BLENDVPS":    {"rw", "r", "r"},
   277  	"IN":          {"w", "r"},
   278  	"MOV":         {"w", "r"},
   279  	"OR":          {"rw", "r"},
   280  	"OUT":         {"r", "r"},
   281  	"PBLENDVB":    {"rw", "r", "r"},
   282  	"RCL":         {"rw", "r"},
   283  	"RCR":         {"rw", "r"},
   284  	"ROL":         {"rw", "r"},
   285  	"ROR":         {"rw", "r"},
   286  	"SAL":         {"rw", "r"},
   287  	"SAR":         {"rw", "r"},
   288  	"SBB":         {"rw", "r"},
   289  	"SHL":         {"rw", "r"},
   290  	"SHLD":        {"rw", "r", "r"},
   291  	"SHR":         {"rw", "r"},
   292  	"SHRD":        {"rw", "r", "r"},
   293  	"SUB":         {"rw", "r", "r"},
   294  	"TEST":        {"r", "r"},
   295  	"VBLENDVPD":   {"rw", "r", "r"},
   296  	"VBLENDVPS":   {"rw", "r", "r"},
   297  	"VPBLENDVB":   {"rw", "r", "r"},
   298  	"VPMASKMOVD":  {"w", "r", "r"},
   299  	"VPMASKMOVQ":  {"w", "r", "r"},
   300  	"VPSLLVD":     {"w", "r", "r"},
   301  	"VPSRAVD":     {"w", "r", "r"},
   302  	"VPSRLVD":     {"w", "r", "r"},
   303  	"VPSRLVQ":     {"w", "r", "r"},
   304  	"VINSERTI128": {"w", "r", "r"},
   305  	"VPBLENDD":    {"w", "r", "r"},
   306  	"VPERMD":      {"w", "r", "r"},
   307  	"VPERMPS":     {"w", "r", "r"},
   308  	"VPERM2I128":  {"w", "r", "r"},
   309  	"VPSLLVQ":     {"w", "r", "r"},
   310  	"XCHG":        {"rw", "rw"},
   311  	"XOR":         {"rw", "r"},
   312  }
   313  
   314  // encodeOK lists valid arg, encoding pairs.
   315  // Any pair not listed gets a warning.
   316  var encodeOK = map[[2]string]bool{
   317  	{"0", "imm8"}:                true,
   318  	{"1", "1"}:                   true,
   319  	{"1", "imm8"}:                true,
   320  	{"<XMM0>", "<XMM0>"}:         true,
   321  	{"<XMM0>", "implicit XMM0"}:  true,
   322  	{"AL", "AL"}:                 true,
   323  	{"AL", "AL/AX/EAX/RAX"}:      true,
   324  	{"AX", "AL/AX/EAX/RAX"}:      true,
   325  	{"AX", "AX"}:                 true,
   326  	{"AX", "AX/EAX/RAX"}:         true,
   327  	{"CL", "CL"}:                 true,
   328  	{"CR0-CR7", "ModRM:reg"}:     true,
   329  	{"CR8", ""}:                  true,
   330  	{"CS", "CS"}:                 true,
   331  	{"DR0-DR7", "ModRM:reg"}:     true,
   332  	{"DS", "DS"}:                 true,
   333  	{"DX", "DX"}:                 true,
   334  	{"EAX", "AL/AX/EAX/RAX"}:     true,
   335  	{"EAX", "AX/EAX/RAX"}:        true,
   336  	{"EAX", "EAX"}:               true,
   337  	{"ES", "ES"}:                 true,
   338  	{"FS", "FS"}:                 true,
   339  	{"GS", "GS"}:                 true,
   340  	{"RAX", "AL/AX/EAX/RAX"}:     true,
   341  	{"RAX", "AX/EAX/RAX"}:        true,
   342  	{"RAX", "RAX"}:               true,
   343  	{"ST", "ST(0)"}:              true,
   344  	{"ST(0)", "ST(0)"}:           true,
   345  	{"ST(i)", "ST(i)"}:           true,
   346  	{"Sreg", "ModRM:reg"}:        true,
   347  	{"bnd1", "ModRM:reg"}:        true,
   348  	{"bnd2/m128", "ModRM:r/m"}:   true,
   349  	{"bnd2/m64", "ModRM:r/m"}:    true,
   350  	{"imm16", "imm16"}:           true,
   351  	{"imm16", "imm8"}:            true,
   352  	{"imm16", "imm8/16/32"}:      true,
   353  	{"imm16", "imm8/16/32"}:      true,
   354  	{"imm16", "imm8/16/32/64"}:   true,
   355  	{"imm16", "iw"}:              true,
   356  	{"imm32", "imm8"}:            true,
   357  	{"imm32", "imm8/16/32"}:      true,
   358  	{"imm32", "imm8/16/32"}:      true,
   359  	{"imm32", "imm8/16/32/64"}:   true,
   360  	{"imm64", "imm8/16/32/64"}:   true,
   361  	{"imm8", "imm8"}:             true,
   362  	{"imm8", "imm8/16/32"}:       true,
   363  	{"imm8", "imm8/16/32"}:       true,
   364  	{"imm8", "imm8/16/32/64"}:    true,
   365  	{"imm8", "imm8[3:0]"}:        true,
   366  	{"m", "ModRM:r/m"}:           true,
   367  	{"m128", "ModRM:r/m"}:        true,
   368  	{"m14/28byte", "ModRM:r/m"}:  true,
   369  	{"m16", "ModRM:r/m"}:         true,
   370  	{"m16&16", "ModRM:r/m"}:      true,
   371  	{"m16&32", "ModRM:r/m"}:      true,
   372  	{"m16&64", "ModRM:r/m"}:      true,
   373  	{"m16:16", "ModRM:r/m"}:      true,
   374  	{"m16:16", "Offset"}:         true,
   375  	{"m16:32", "ModRM:r/m"}:      true,
   376  	{"m16:32", "Offset"}:         true,
   377  	{"m16:64", "ModRM:r/m"}:      true,
   378  	{"m16:64", "Offset"}:         true,
   379  	{"m16int", "ModRM:r/m"}:      true,
   380  	{"m256", "ModRM:r/m"}:        true,
   381  	{"m2byte", "ModRM:r/m"}:      true,
   382  	{"m32", "ModRM:r/m"}:         true,
   383  	{"m32&32", "ModRM:r/m"}:      true,
   384  	{"m32fp", "ModRM:r/m"}:       true,
   385  	{"m32int", "ModRM:r/m"}:      true,
   386  	{"m512byte", "ModRM:r/m"}:    true,
   387  	{"m64", "ModRM:r/m"}:         true,
   388  	{"m64fp", "ModRM:r/m"}:       true,
   389  	{"m64int", "ModRM:r/m"}:      true,
   390  	{"m8", "ModRM:r/m"}:          true,
   391  	{"m80bcd", "ModRM:r/m"}:      true,
   392  	{"m80dec", "ModRM:r/m"}:      true,
   393  	{"m80fp", "ModRM:r/m"}:       true,
   394  	{"m94/108byte", "ModRM:r/m"}: true,
   395  	{"mem", "ModRM:r/m"}:         true,
   396  	{"mib", "ModRM:r/m"}:         true,
   397  	{"mm/m32", "ModRM:r/m"}:      true,
   398  	{"mm1", "ModRM:reg"}:         true,
   399  	{"mm2", "ModRM:r/m"}:         true,
   400  	{"mm2/m32", "ModRM:r/m"}:     true,
   401  	{"mm2/m64", "ModRM:r/m"}:     true,
   402  	{"moffs16", "Moffs"}:         true,
   403  	{"moffs32", "Moffs"}:         true,
   404  	{"moffs64", "Moffs"}:         true,
   405  	{"moffs8", "Moffs"}:          true,
   406  	{"ptr16:16", "Offset"}:       true,
   407  	{"ptr16:32", "Offset"}:       true,
   408  	{"r/m16", "ModRM:r/m"}:       true,
   409  	{"r/m32", "ModRM:r/m"}:       true,
   410  	{"r/m64", "ModRM:r/m"}:       true,
   411  	{"r/m8", "ModRM:r/m"}:        true,
   412  	{"r16", "ModRM:reg"}:         true,
   413  	{"r16op", "opcode + rd"}:     true,
   414  	{"r32", "ModRM:reg"}:         true,
   415  	{"r32", "VEX.vvvv"}:          true,
   416  	{"r32/m16", "ModRM:r/m"}:     true,
   417  	{"r32/m8", "ModRM:r/m"}:      true,
   418  	{"r32V", "VEX.vvvv"}:         true,
   419  	{"r32op", "opcode + rd"}:     true,
   420  	{"r64", "ModRM:reg"}:         true,
   421  	{"r64/m16", "ModRM:r/m"}:     true,
   422  	{"r64V", "VEX.vvvv"}:         true,
   423  	{"r64op", "opcode + rd"}:     true,
   424  	{"r8", "ModRM:reg"}:          true,
   425  	{"r8op", "opcode + rd"}:      true,
   426  	{"rel16", "Offset"}:          true,
   427  	{"rel32", "Offset"}:          true,
   428  	{"rel8", "Offset"}:           true,
   429  	{"rmr16", "ModRM:r/m"}:       true,
   430  	{"rmr32", "ModRM:r/m"}:       true,
   431  	{"rmr64", "ModRM:r/m"}:       true,
   432  	{"xmm/m128", "ModRM:r/m"}:    true,
   433  	{"xmm/m32", "ModRM:r/m"}:     true,
   434  	{"xmm1", "ModRM:reg"}:        true,
   435  	{"xmm2", "ModRM:r/m"}:        true,
   436  	{"xmm2/m128", "ModRM:r/m"}:   true,
   437  	{"xmm2/m16", "ModRM:r/m"}:    true,
   438  	{"xmm2/m32", "ModRM:r/m"}:    true,
   439  	{"xmm2/m64", "ModRM:r/m"}:    true,
   440  	{"xmm2/m8", "ModRM:r/m"}:     true,
   441  	{"xmmIH", "imm8[7:4]"}:       true,
   442  	{"xmmV", "VEX.vvvv"}:         true,
   443  	{"ymm1", "ModRM:reg"}:        true,
   444  	{"ymm2", "ModRM:r/m"}:        true,
   445  	{"ymm2/m256", "ModRM:r/m"}:   true,
   446  	{"ymmIH", "imm8[7:4]"}:       true,
   447  	{"ymmV", "VEX.vvvv"}:         true,
   448  	{"vm32x", "vsib"}:            true,
   449  	{"vm64x", "vsib"}:            true,
   450  	{"vm32y", "vsib"}:            true,
   451  	{"vm64y", "vsib"}:            true,
   452  	{"SS", "SS"}:                 true,
   453  	{"3", "3"}:                   true,
   454  }
   455  
   456  // instBlacklist lists the instruction syntaxes to ignore when parsing.
   457  // We exclude Intel's general forms for these not-actually-general instructions.
   458  // The syntax makes it look like arbitrary memory operands can be used when in fact
   459  // the exact address is fixed in all cases - [DI] or [SI], for example
   460  var instBlacklist = map[string]bool{
   461  	"CMPS m16, m16":       true,
   462  	"CMPS m32, m32":       true,
   463  	"CMPS m64, m64":       true,
   464  	"CMPS m8, m8":         true,
   465  	"INS m16, DX":         true,
   466  	"INS m32, DX":         true,
   467  	"INS m8, DX":          true,
   468  	"LODS m16":            true,
   469  	"LODS m32":            true,
   470  	"LODS m64":            true,
   471  	"LODS m8":             true,
   472  	"MOVS m16, m16":       true,
   473  	"MOVS m32, m32":       true,
   474  	"MOVS m64, m64":       true,
   475  	"MOVS m8, m8":         true,
   476  	"OUTS DX, m16":        true,
   477  	"OUTS DX, m32":        true,
   478  	"OUTS DX, m8":         true,
   479  	"REP INS m16, DX":     true,
   480  	"REP INS m32, DX":     true,
   481  	"REP INS m8, DX":      true,
   482  	"REP INS r/m32, DX":   true,
   483  	"REP LODS AL":         true,
   484  	"REP LODS AX":         true,
   485  	"REP LODS EAX":        true,
   486  	"REP LODS RAX":        true,
   487  	"REP MOVS m16, m16":   true,
   488  	"REP MOVS m32, m32":   true,
   489  	"REP MOVS m64, m64":   true,
   490  	"REP MOVS m8, m8":     true,
   491  	"REP OUTS DX, m16":    true,
   492  	"REP OUTS DX, m32":    true,
   493  	"REP OUTS DX, m8":     true,
   494  	"REP OUTS DX, r/m16":  true,
   495  	"REP OUTS DX, r/m32":  true,
   496  	"REP OUTS DX, r/m8":   true,
   497  	"REP STOS m16":        true,
   498  	"REP STOS m32":        true,
   499  	"REP STOS m64":        true,
   500  	"REP STOS m8":         true,
   501  	"REPE CMPS m16, m16":  true,
   502  	"REPE CMPS m32, m32":  true,
   503  	"REPE CMPS m64, m64":  true,
   504  	"REPE CMPS m8, m8":    true,
   505  	"REPE SCAS m16":       true,
   506  	"REPE SCAS m32":       true,
   507  	"REPE SCAS m64":       true,
   508  	"REPE SCAS m8":        true,
   509  	"REPNE CMPS m16, m16": true,
   510  	"REPNE CMPS m32, m32": true,
   511  	"REPNE CMPS m64, m64": true,
   512  	"REPNE CMPS m8, m8":   true,
   513  	"REPNE SCAS m16":      true,
   514  	"REPNE SCAS m32":      true,
   515  	"REPNE SCAS m64":      true,
   516  	"REPNE SCAS m8":       true,
   517  	"SCAS m16":            true,
   518  	"SCAS m32":            true,
   519  	"SCAS m64":            true,
   520  	"SCAS m8":             true,
   521  	"STOS m16":            true,
   522  	"STOS m32":            true,
   523  	"STOS m64":            true,
   524  	"STOS m8":             true,
   525  	"XLAT m8":             true,
   526  }
   527  
   528  // condPrefs lists preferences for condition code suffixes.
   529  // The first suffix in each pair takes priority over the second.
   530  var condPrefs = [][2]string{
   531  	{"B", "C"},
   532  	{"B", "NAE"},
   533  	{"AE", "NB"},
   534  	{"AE", "NC"},
   535  	{"E", "Z"},
   536  	{"NE", "NZ"},
   537  	{"BE", "NA"},
   538  	{"A", "NBE"},
   539  	{"P", "PE"},
   540  	{"NP", "PO"},
   541  	{"L", "NGE"},
   542  	{"GE", "NL"},
   543  	{"LE", "NG"},
   544  	{"G", "NLE"},
   545  }
   546  
   547  // conv16 specifies replacements to turn a 16-bit syntax into a 32-bit syntax.
   548  // If the conv16 can be applied to one form to create a new form with the same
   549  // fixed instruction prefix, the pair is tagged as operand16 and operand32
   550  // respectively.
   551  var conv16 = strings.NewReplacer(
   552  	"16:16", "16:32",
   553  	"16", "32",
   554  	"AX", "EAX",
   555  	"CBW", "CWDE",
   556  	"CMPSW", "CMPSD",
   557  	"CWD", "CDQ",
   558  	"INSW", "INSD",
   559  	"IRET", "IRETD",
   560  	"LODSW", "LODSD",
   561  	"MOVSW", "MOVSD",
   562  	"OUTSW", "OUTSD",
   563  	"POPA", "POPAD",
   564  	"POPF", "POPFD",
   565  	"PUSHA", "PUSHAD",
   566  	"PUSHF", "PUSHFD",
   567  	"SCASW", "SCASD",
   568  	"STOSW", "STOSD",
   569  )
   570  
   571  // fixup records additional modifications needed that are not derived
   572  // from the instructions in the manual. It is keyed by the syntax and opcode.
   573  var fixup = map[[2]string][]fixer{
   574  	// NOP is a very special case overloading XCHG AX, AX.
   575  	// The decoder handles it in custom code; exclude from the usual tables.
   576  	{"NOP", "90"}: {fixAddTag("pseudo")},
   577  
   578  	// PAUSE is a special case of NOP.
   579  	{"PAUSE", "F3 90"}: {fixAddTag("pseudo")}, // used to add 'keepop' tag but not sure what that means
   580  
   581  	// Far CALL, JMP, RET are given L prefix (long) for disambiguation.
   582  	{"CALL m16:16", "FF /3"}:       {fixRename("CALL_FAR")},
   583  	{"CALL m16:32", "FF /3"}:       {fixRename("CALL_FAR")},
   584  	{"CALL m16:64", "REX.W FF /3"}: {fixRename("CALL_FAR")},
   585  	{"CALL ptr16:16", "9A cd"}:     {fixRename("CALL_FAR")},
   586  	{"CALL ptr16:32", "9A cp"}:     {fixRename("CALL_FAR")},
   587  	{"JMP m16:16", "FF /5"}:        {fixRename("JMP_FAR")},
   588  	{"JMP m16:32", "FF /5"}:        {fixRename("JMP_FAR")},
   589  	{"JMP m16:64", "REX.W FF /5"}:  {fixRename("JMP_FAR")},
   590  	{"JMP ptr16:16", "EA cd"}:      {fixRename("JMP_FAR")},
   591  	{"JMP ptr16:32", "EA cp"}:      {fixRename("JMP_FAR")},
   592  	{"RET imm16", "CA iw"}:         {fixRename("RET_FAR"), fixArg(0, "imm16u")},
   593  	{"RET", "CB"}:                  {fixRename("RET_FAR")},
   594  
   595  	// Unsigned immediates. (RET far imm16 handled above.)
   596  	// Some of these are just preferences for disassembling.
   597  	{"ENTER imm16, imm8", "C8 iw ib"}:  {fixArg(1, "imm8b")},
   598  	{"RET imm16", "C2 iw"}:             {fixArg(0, "imm16u")},
   599  	{"IN AL, imm8", "E4 ib"}:           {fixArg(1, "imm8u")},
   600  	{"IN AX, imm8", "E5 ib"}:           {fixArg(1, "imm8u")},
   601  	{"IN EAX, imm8", "E5 ib"}:          {fixArg(1, "imm8u"), fixAddTag("operand64")},
   602  	{"OUT imm8, AL", "E6 ib"}:          {fixArg(0, "imm8u")},
   603  	{"OUT imm8, AX", "E7 ib"}:          {fixArg(0, "imm8u")},
   604  	{"OUT imm8, EAX", "E7 ib"}:         {fixArg(0, "imm8u"), fixAddTag("operand64")},
   605  	{"MOV r8op, imm8", "B0+rb ib"}:     {fixArg(1, "imm8u")},
   606  	{"MOV r8op, imm8", "REX B0+rb ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")},
   607  	{"MOV r/m8, imm8", "C6 /0 ib"}:     {fixArg(1, "imm8u")},
   608  	{"MOV r/m8, imm8", "REX C6 /0 ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")},
   609  
   610  	// The listings for MOVSX and MOVSXD do not list some variants that
   611  	// assemblers seem to allow.
   612  	// As a result, this instruction got the wrong tag.
   613  	// The other instructions are listed in extraInsts.
   614  	{"MOVSX r32, r/m16", "0F BF /r"}: {fixRemoveTag("operand16"), fixAddTag("operand32")},
   615  	{"MOVZX r32, r/m16", "0F B7 /r"}: {fixRemoveTag("operand16")},
   616  
   617  	// Listings are incomplete or incorrect. Fix tags to adjust for new instructions below.
   618  	{"SLDT r/m16", "0F 00 /0"}:             {fixRemoveTag("operand32")},
   619  	{"STR r/m16", "0F 00 /1"}:              {fixAddTag("operand16")},
   620  	{"BSWAP r32op", "0F C8+rd"}:            {fixRemoveTag("operand16")},
   621  	{"MOV Sreg, r/m16", "8E /r"}:           {fixRemoveTag("operand32")},
   622  	{"MOV Sreg, r/m64", "REX.W 8E /r"}:     {fixArg(1, "r/m16")},
   623  	{"MOV r/m64, Sreg", "REX.W 8C /r"}:     {fixArg(0, "r/m16")},
   624  	{"MOV r/m16, Sreg", "8C /r"}:           {fixRemoveTag("operand32")},
   625  	{"MOV r/m64, imm32", "REX.W C7 /0 io"}: {fixOpcode("REX.W C7 /0 id")},
   626  
   627  	// On 64-bit, these ignore 64-bit mode change.
   628  	{"POP FS", "0F A1"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))},
   629  	{"POP GS", "0F A9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))},
   630  	{"LEAVE", "C9"}:     {fixIfValid("N.E.", "V", fixAddTag("operand64"))},
   631  
   632  	{"IN EAX, DX", "ED"}:         {fixAddTag("operand64")},
   633  	{"INSD", "6D"}:               {fixAddTag("operand64")},
   634  	{"OUT DX, EAX", "EF"}:        {fixAddTag("operand64")},
   635  	{"OUTSD", "6F"}:              {fixAddTag("operand64")},
   636  	{"XBEGIN rel32", "C7 F8 cd"}: {fixAddTag("operand64")},
   637  
   638  	// Treat FWAIT, not WAIT, as canonical.
   639  	{"FWAIT", "9B"}: {fixRemoveTag("pseudo")},
   640  	{"WAIT", "9B"}:  {fixAddTag("pseudo")},
   641  
   642  	// LAHF and SAHF are listed as "Invalid*" for 64-bit mode.
   643  	// They are actually defined, so Valid from our point of view.
   644  	// It's just that only a very few 64-bit processors allowed them.
   645  	{"LAHF", "9F"}: {fixValid("V", "V")},
   646  	{"SAHF", "9E"}: {fixValid("V", "V")},
   647  
   648  	// The JZ forms are listed twice in the table, which confuses things.
   649  	{"JZ rel16", "0F 84 cw"}: {fixAddTag("operand16"), fixRemoveTag("operand32")},
   650  	{"JZ rel32", "0F 84 cd"}: {fixAddTag("operand32"), fixRemoveTag("operand16")},
   651  
   652  	// XCHG has two of every instruction, which makes things bad.
   653  	// The XX hack below takes care of most problems but this one remains.
   654  	{"XCHG r/m16, r16", "87 /r"}: {fixRemoveTag("pseudo")},
   655  
   656  	// MOV CR8 is just the obvious extension of the MOV CR0-CR7 form.
   657  	{"MOV rmr64, CR8", "REX.R + 0F 20 /0"}: {fixAddTag("pseudo")},
   658  	{"MOV CR8, rmr64", "REX.R + 0F 22 /0"}: {fixAddTag("pseudo")},
   659  
   660  	// TODO: EXPLAIN ALL THESE
   661  	{"ADCX r32, r/m32", "66 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")},
   662  	{"ADOX r32, r/m32", "F3 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")},
   663  	{"POPFQ", "9D"}:                       {fixAddTag("operand32"), fixAddTag("operand64")},
   664  	{"PUSHFQ", "9C"}:                      {fixAddTag("operand32"), fixAddTag("operand64")},
   665  	{"JCXZ rel8", "E3 cb"}:                {fixAddTag("address16")},
   666  	{"JECXZ rel8", "E3 cb"}:               {fixAddTag("address32")},
   667  	{"JRCXZ rel8", "E3 cb"}:               {fixAddTag("address64")},
   668  	{"PUSH r64op", "50+rd"}:               {fixAddTag("operand32"), fixAddTag("operand64")},
   669  	{"PUSH r/m64", "FF /6"}:               {fixAddTag("operand32"), fixAddTag("operand64")},
   670  	{"POP r64op", "58+rd"}:                {fixAddTag("operand32"), fixAddTag("operand64")},
   671  	{"POP r/m64", "8F /0"}:                {fixAddTag("operand32"), fixAddTag("operand64")},
   672  	{"SMSW r/m16", "0F 01 /4"}:            {fixAddTag("operand16")},
   673  	{"SMSW r32/m16", "0F 01 /4"}:          {fixRemoveTag("operand16"), fixAddTag("operand32")},
   674  
   675  	// Express to the decoder that the rel16 only applies in 16-bit operand mode.
   676  	{"JA rel16", "0F 87 cw"}:  {fixAddTag("operand16")},
   677  	{"JAE rel16", "0F 83 cw"}: {fixAddTag("operand16")},
   678  	{"JB rel16", "0F 82 cw"}:  {fixAddTag("operand16")},
   679  	{"JBE rel16", "0F 86 cw"}: {fixAddTag("operand16")},
   680  	{"JE rel16", "0F 84 cw"}:  {fixAddTag("operand16")},
   681  	{"JG rel16", "0F 8F cw"}:  {fixAddTag("operand16")},
   682  	{"JGE rel16", "0F 8D cw"}: {fixAddTag("operand16")},
   683  	{"JL rel16", "0F 8C cw"}:  {fixAddTag("operand16")},
   684  	{"JLE rel16", "0F 8E cw"}: {fixAddTag("operand16")},
   685  	{"JNE rel16", "0F 85 cw"}: {fixAddTag("operand16")},
   686  	{"JNO rel16", "0F 81 cw"}: {fixAddTag("operand16")},
   687  	{"JNP rel16", "0F 8B cw"}: {fixAddTag("operand16")},
   688  	{"JNS rel16", "0F 89 cw"}: {fixAddTag("operand16")},
   689  	{"JO rel16", "0F 80 cw"}:  {fixAddTag("operand16")},
   690  	{"JP rel16", "0F 8A cw"}:  {fixAddTag("operand16")},
   691  	{"JS rel16", "0F 88 cw"}:  {fixAddTag("operand16")},
   692  
   693  	{"JA rel32", "0F 87 cd"}:  {fixAddTag("operand32")},
   694  	{"JAE rel32", "0F 83 cd"}: {fixAddTag("operand32")},
   695  	{"JB rel32", "0F 82 cd"}:  {fixAddTag("operand32")},
   696  	{"JBE rel32", "0F 86 cd"}: {fixAddTag("operand32")},
   697  	{"JE rel32", "0F 84 cd"}:  {fixAddTag("operand32")},
   698  	{"JG rel32", "0F 8F cd"}:  {fixAddTag("operand32")},
   699  	{"JGE rel32", "0F 8D cd"}: {fixAddTag("operand32")},
   700  	{"JL rel32", "0F 8C cd"}:  {fixAddTag("operand32")},
   701  	{"JLE rel32", "0F 8E cd"}: {fixAddTag("operand32")},
   702  	{"JNE rel32", "0F 85 cd"}: {fixAddTag("operand32")},
   703  	{"JNO rel32", "0F 81 cd"}: {fixAddTag("operand32")},
   704  	{"JNP rel32", "0F 8B cd"}: {fixAddTag("operand32")},
   705  	{"JNS rel32", "0F 89 cd"}: {fixAddTag("operand32")},
   706  	{"JO rel32", "0F 80 cd"}:  {fixAddTag("operand32")},
   707  	{"JP rel32", "0F 8A cd"}:  {fixAddTag("operand32")},
   708  	{"JS rel32", "0F 88 cd"}:  {fixAddTag("operand32")},
   709  
   710  	{"LSL r16, r/m16", "0F 03 /r"}: {fixAddTag("operand16")},
   711  }
   712  
   713  var extraInsts = []*instruction{
   714  	// Undocumented.
   715  	{syntax: "ICEBP", opcode: "F1", valid32: "V", valid64: "V"},
   716  	{syntax: "UD1", opcode: "0F B9", valid32: "V", valid64: "V"},
   717  	{syntax: "FFREEP ST(i)", opcode: "DF C0+i", valid32: "V", valid64: "V", action: "w"},
   718  
   719  	// Where did these come from? They were in version 0.01 of the csv table.
   720  	{syntax: "MOVNTSD m64, xmm1", opcode: "F2 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"},
   721  	{syntax: "MOVNTSS m32, xmm1", opcode: "F3 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"},
   722  
   723  	// These express to the decoder that in 64-bit mode
   724  	// an operand prefix does not affect the size of the relative offset.
   725  	{syntax: "CALL rel32", opcode: "E8 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   726  	{syntax: "JMP rel32", opcode: "E9 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   727  	{syntax: "JA rel32", opcode: "0F 87 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   728  	{syntax: "JAE rel32", opcode: "0F 83 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   729  	{syntax: "JB rel32", opcode: "0F 82 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   730  	{syntax: "JBE rel32", opcode: "0F 86 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   731  	{syntax: "JE rel32", opcode: "0F 84 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   732  	{syntax: "JG rel32", opcode: "0F 8F cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   733  	{syntax: "JGE rel32", opcode: "0F 8D cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   734  	{syntax: "JL rel32", opcode: "0F 8C cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   735  	{syntax: "JLE rel32", opcode: "0F 8E cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   736  	{syntax: "JNE rel32", opcode: "0F 85 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   737  	{syntax: "JNO rel32", opcode: "0F 81 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   738  	{syntax: "JNP rel32", opcode: "0F 8B cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   739  	{syntax: "JNS rel32", opcode: "0F 89 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   740  	{syntax: "JO rel32", opcode: "0F 80 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   741  	{syntax: "JP rel32", opcode: "0F 8A cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   742  	{syntax: "JS rel32", opcode: "0F 88 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
   743  
   744  	// Disassemblers recognize these, but they're not in the manual.
   745  	// Not sure if they really exist.
   746  
   747  	// The 16-16 and 32-32 forms don't really make sense since there's nothing to extend.
   748  	{syntax: "MOVSX r16, r/m16", opcode: "0F BF /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"},
   749  	{syntax: "MOVSXD r16, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand16"}, action: "w,r"},
   750  	{syntax: "MOVSXD r32, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand32"}, action: "w,r"},
   751  	{syntax: "MOVZX r16, r/m16", opcode: "0F B7 /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"},
   752  
   753  	{syntax: "LAR r64, r/m16", opcode: "REX.W 0F 02 /r", valid32: "N.E.", valid64: "V", action: "w,r"},
   754  	{syntax: "SLDT r32/m16", opcode: "0F 00 /0", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"},
   755  	{syntax: "STR r32/m16", opcode: "0F 00 /1", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"},
   756  	{syntax: "STR r64/m16", opcode: "REX.W 0F 00 /1", valid32: "N.E.", valid64: "V", action: "w"},
   757  
   758  	{syntax: "BSWAP r16op", opcode: "0F C8+rd", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "rw"},
   759  
   760  	// Do these exist?
   761  	// I am not sure where they came from, and xed doesn't recognize them.
   762  	//{syntax: "MOV TR0-TR7, rmr32", opcode: "0F 26 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"},
   763  	//{syntax: "MOV TR0-TR7, rmr64", opcode: "0F 26 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"},
   764  	//{syntax: "MOV rmr32, TR0-TR7", opcode: "0F 24 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"},
   765  	//{syntax: "MOV rmr64, TR0-TR7", opcode: "0F 24 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"},
   766  	{syntax: "MOV Sreg, r32/m16", opcode: "8E /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"},
   767  	{syntax: "MOV r/m32, Sreg", opcode: "8C /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"},
   768  }
   769  
   770  type fixer func(*instruction)
   771  
   772  func fixAddTag(tag string) fixer {
   773  	return func(inst *instruction) {
   774  		addTag(inst, tag)
   775  	}
   776  }
   777  
   778  func fixRemoveTag(tag string) fixer {
   779  	return func(inst *instruction) {
   780  		removeTag(inst, tag)
   781  	}
   782  }
   783  
   784  func fixRename(op string) fixer {
   785  	return func(inst *instruction) {
   786  		_, args := splitSyntax(inst.syntax)
   787  		inst.syntax = joinSyntax(op, args)
   788  	}
   789  }
   790  
   791  func fixArg(i int, arg string) fixer {
   792  	return func(inst *instruction) {
   793  		op, args := splitSyntax(inst.syntax)
   794  		args[i] = arg
   795  		inst.syntax = joinSyntax(op, args)
   796  	}
   797  }
   798  
   799  func fixIfValid(valid32, valid64 string, fix fixer) fixer {
   800  	return func(inst *instruction) {
   801  		if inst.valid32 == valid32 && inst.valid64 == valid64 {
   802  			fix(inst)
   803  		}
   804  	}
   805  }
   806  
   807  func fixValid(valid32, valid64 string) fixer {
   808  	return func(inst *instruction) {
   809  		inst.valid32 = valid32
   810  		inst.valid64 = valid64
   811  	}
   812  }
   813  
   814  func fixOpcode(opcode string) fixer {
   815  	return func(inst *instruction) {
   816  		inst.opcode = opcode
   817  	}
   818  }
   819  
   820  func cleanup(insts []*instruction) []*instruction {
   821  	var haveOp map[string]bool
   822  	if onlySomePages {
   823  		haveOp = map[string]bool{}
   824  	}
   825  
   826  	// Clean individual instruction encodings and opcode sequences.
   827  	sawJZ := map[string]bool{}
   828  	out := insts[:0]
   829  	for seq, inst := range insts {
   830  		inst.seq = seq
   831  
   832  		// There are two copies each of JZ rel16 and JZ rel32. Delete the second.
   833  		if strings.HasPrefix(inst.syntax, "JZ rel") {
   834  			if sawJZ[inst.syntax] {
   835  				continue
   836  			}
   837  			sawJZ[inst.syntax] = true
   838  		}
   839  		out = append(out, inst)
   840  
   841  		// Intel CMPXCHG16B and CMPXCHG8B have surprise "m64" or " m128" at end of encoding.
   842  		surprises := []string{
   843  			" m64",
   844  			" m128",
   845  		}
   846  		for _, s := range surprises {
   847  			if strings.HasSuffix(inst.syntax, s) && strings.HasSuffix(inst.opcode, s) {
   848  				inst.opcode = strings.TrimSuffix(inst.opcode, s)
   849  			}
   850  		}
   851  
   852  		op, args := splitSyntax(inst.syntax)
   853  		op = strings.TrimRight(op, "*")
   854  		inst.syntax = joinSyntax(op, args)
   855  
   856  		// Check argument names in syntax against encoding details.
   857  		if enc, ok := encodings[inst.syntax]; ok {
   858  			inst.args = enc
   859  		}
   860  		if len(args) == len(inst.args)+1 && args[len(args)-1] == "imm8" {
   861  			fixed := make([]string, len(args))
   862  			copy(fixed, inst.args)
   863  			fixed[len(args)-1] = "imm8"
   864  			inst.args = fixed
   865  		} else if len(args) == 0 && len(inst.args) == 1 && inst.args[0] == "NA" {
   866  			inst.args = []string{}
   867  		} else if len(args) != len(inst.args) {
   868  			fmt.Fprintf(os.Stderr, "p.%d: %s has %d args but %d encoding details:\n\t%s\n", inst.page, inst.syntax, len(args), len(inst.args), strings.Join(inst.args, "; "))
   869  			inst.syntax = joinSyntax(op, args)
   870  			continue
   871  		}
   872  
   873  		var action []string
   874  		for i, arg := range args {
   875  			arg = strings.TrimSpace(arg)
   876  			arg = strings.TrimRight(arg, "*")
   877  			if (arg == "reg" || strings.HasPrefix(arg, "reg/")) && containsAll(inst.desc, "upper bits", "r64", "zero") {
   878  				arg = "r32" + strings.TrimPrefix(arg, "reg")
   879  			}
   880  
   881  			enc := inst.args[i]
   882  			enc = strings.TrimSpace(enc)
   883  			switch {
   884  			case strings.HasSuffix(enc, " (r))"):
   885  				enc = strings.TrimSuffix(enc, ")")
   886  			case strings.HasSuffix(enc, " (R)"):
   887  				enc = strings.TrimSuffix(enc, " (R)") + " (r)"
   888  			case strings.HasSuffix(enc, " (W)"):
   889  				enc = strings.TrimSuffix(enc, " (W)") + " (w)"
   890  			case strings.HasSuffix(enc, " (r,w)"):
   891  				enc = strings.TrimSuffix(enc, " (r,w)") + " (r, w)"
   892  			case enc == "Imm8":
   893  				enc = "imm8"
   894  			case enc == "imm8/26/32":
   895  				enc = "imm8/16/32"
   896  			case enc == "BaseReg (R): VSIB:base, VectorReg(R): VSIB:index":
   897  				enc = "vsib (r)"
   898  			}
   899  			inst.args[i] = enc
   900  
   901  			switch {
   902  			case strings.HasSuffix(enc, " (r)"):
   903  				action = append(action, "r")
   904  				enc = strings.TrimSuffix(enc, " (r)")
   905  			case strings.HasSuffix(enc, " (w)"):
   906  				action = append(action, "w")
   907  				enc = strings.TrimSuffix(enc, " (w)")
   908  			case strings.HasSuffix(enc, " (r, w)"):
   909  				action = append(action, "rw")
   910  				enc = strings.TrimSuffix(enc, " (r, w)")
   911  			case strings.HasPrefix(enc, "imm"), enc == "Offset", enc == "iw", arg == "1", arg == "0", arg == "3":
   912  				action = append(action, "r")
   913  			case i < len(opAction[op]):
   914  				action = append(action, opAction[op][i])
   915  			default:
   916  				fmt.Fprintf(os.Stderr, "p.%d: %s has encoding %s for %s but no r/w annotations\n", inst.page, inst.syntax, enc, arg)
   917  				action = append(action, "?")
   918  			}
   919  
   920  			if arg == "mem" && op == "LDDQU" {
   921  				arg = "m128"
   922  			}
   923  			if arg == "reg" && op == "LAR" {
   924  				arg = "r32"
   925  			}
   926  			if actual := encodeReplace[[2]string{arg, enc}]; actual != "" {
   927  				arg = actual
   928  			}
   929  
   930  			if (arg == "r8" || arg == "r16" || arg == "r32" || arg == "r64") && enc == "ModRM:r/m" {
   931  				addTag(inst, "modrm_regonly")
   932  				arg = "rmr" + arg[1:]
   933  			}
   934  			if (arg == "xmm2" || arg == "ymm2") && enc == "ModRM:r/m" {
   935  				addTag(inst, "modrm_regonly")
   936  			}
   937  
   938  			if (arg == "m8" || arg == "m16" || arg == "m32" || arg == "m64" || arg == "m128" || arg == "m256") && enc == "ModRM:r/m" {
   939  				addTag(inst, "modrm_memonly")
   940  			}
   941  
   942  			if arg == "r64" && (inst.syntax == "MOV r64, CR8" || inst.syntax == "MOV CR8, r64") {
   943  				arg = "rmr64"
   944  				addTag(inst, "modrm_regonly")
   945  			}
   946  			if arg == "CR8" {
   947  				enc = ""
   948  			}
   949  
   950  			if !encodeOK[[2]string{arg, enc}] {
   951  				fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s for %s\n\t{%q, %q}: true,\n", inst.page, inst.syntax, enc, arg, arg, enc)
   952  			}
   953  
   954  			args[i] = arg
   955  
   956  			// Intel SETcc and others are missing the /r.
   957  			// But CALL rel16 and CALL rel32 have a bad encoding table so ignore the ModRM there.
   958  			if strings.HasPrefix(enc, "ModRM") && !strings.Contains(inst.opcode, " /") && op != "CALL" {
   959  				inst.opcode += " /r"
   960  			}
   961  			if strings.HasPrefix(enc, "ModRM:reg") && !strings.Contains(inst.opcode, "/r") {
   962  				// The opcode is taken up with something else. Bug in table.
   963  				fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s: no reg field in %s\n", inst.page, inst.syntax, arg, inst.opcode)
   964  			}
   965  			// XBEGIN is missing cw cd.
   966  			if enc == "Offset" && arg == "rel16" && !strings.Contains(inst.opcode, " cw") {
   967  				inst.opcode += " cw"
   968  			}
   969  			if enc == "Offset" && arg == "rel32" && !strings.Contains(inst.opcode, " cd") {
   970  				inst.opcode += " cd"
   971  			}
   972  			if enc == "Moffs" && !strings.Contains(inst.opcode, "cm") {
   973  				inst.opcode += " cm"
   974  			}
   975  
   976  			inst.action = strings.Join(action, ",")
   977  		}
   978  
   979  		inst.syntax = joinSyntax(op, args)
   980  
   981  		// The Intel manual lists each XCHG form with arguments in both orders.
   982  		// While this is technically correct, it confuses lots of the analysis.
   983  		// Change half of them to start with a fake "XX" byte.
   984  		if op == "XCHG" && !strings.HasPrefix(args[0], "r/") && !strings.HasSuffix(args[0], "op") {
   985  			inst.opcode = "XX " + inst.opcode
   986  		}
   987  
   988  		// Intel manual is not great about disabling REX instructions on 32-bit systems.
   989  		if strings.Contains(inst.opcode, "REX") && inst.valid32 == "V" {
   990  			inst.valid32 = "N.E."
   991  		}
   992  
   993  		if inst.valid32 == "V" {
   994  			switch {
   995  			case containsAll(inst.compat, "not supported", "earlier than the Intel486"):
   996  				inst.cpuid = "486"
   997  			case containsAll(inst.compat, "not supported", "earlier than the Pentium"),
   998  				containsAll(inst.compat, "were introduced", "with the Pentium"):
   999  				inst.cpuid = "Pentium"
  1000  			case containsAll(inst.compat, "were introduced", "in the Pentium II"):
  1001  				inst.cpuid = "PentiumII"
  1002  			case containsAll(inst.compat, "were introduced", "in the P6 family"),
  1003  				containsAll(inst.compat, "were introduced in P6 family"):
  1004  				addTag(inst, "P6")
  1005  			}
  1006  		}
  1007  
  1008  		if onlySomePages {
  1009  			op, _ := splitSyntax(inst.syntax)
  1010  			haveOp[op] = true
  1011  		}
  1012  	}
  1013  
  1014  	insts = out
  1015  	sort.Sort(byOpcode(insts))
  1016  
  1017  	// Detect operand size dependencies.
  1018  	var last *instruction
  1019  	for _, inst := range insts {
  1020  		if last != nil {
  1021  			f1, _ := splitOpcode(last.opcode)
  1022  			f2, _ := splitOpcode(inst.opcode)
  1023  			if f1 == f2 {
  1024  				// Conflict: cannot distinguish instructions based on fixed prefix.
  1025  				if is16vs32pair(last, inst) {
  1026  					addTag(last, "operand16")
  1027  					addTag(inst, "operand32")
  1028  					continue
  1029  				}
  1030  				if is16vs32pair(inst, last) {
  1031  					addTag(last, "operand32")
  1032  					addTag(inst, "operand16")
  1033  					last = inst
  1034  					continue
  1035  				}
  1036  			}
  1037  		}
  1038  		last = inst
  1039  	}
  1040  
  1041  	// Detect pseudo-ops, defined as opcode entries subsumed by more general ones.
  1042  	seen := map[string]*instruction{}
  1043  	for _, inst := range insts {
  1044  		if strings.HasPrefix(inst.opcode, "9B ") { // FWAIT prefix
  1045  			addTag(inst, "pseudo")
  1046  			continue
  1047  		}
  1048  		if inst.opcode == "F0" || inst.opcode == "F2" || inst.opcode == "F3" {
  1049  			addTag(inst, "pseudo")
  1050  			continue
  1051  		}
  1052  		if strings.HasPrefix(inst.syntax, "REP ") || strings.HasPrefix(inst.syntax, "REPE ") || strings.HasPrefix(inst.syntax, "REPNE ") {
  1053  			addTag(inst, "pseudo")
  1054  			continue
  1055  		}
  1056  		if strings.HasPrefix(inst.syntax, "SAL ") { // SHL is canonical
  1057  			addTag(inst, "pseudo")
  1058  			continue
  1059  		}
  1060  		if old := seen[inst.opcode]; old != nil {
  1061  			if condLess(old.syntax, inst.syntax) {
  1062  				addTag(inst, "pseudo")
  1063  				continue
  1064  			}
  1065  			if xchgLess(inst.syntax, old.syntax) {
  1066  				old.tags = append(old.tags, "pseudo")
  1067  				seen[inst.opcode] = inst
  1068  				continue
  1069  			}
  1070  		}
  1071  
  1072  		seen[inst.opcode] = inst
  1073  
  1074  		if last != nil && canGenerate(last.opcode, inst.opcode) {
  1075  			addTag(inst, "pseudo")
  1076  			continue
  1077  		}
  1078  		last = inst
  1079  	}
  1080  	for _, inst := range insts {
  1081  		if strings.Contains(inst.opcode, "REX ") {
  1082  			if old := seen[strings.Replace(inst.opcode, "REX ", "", 1)]; old != nil && old.syntax == inst.syntax {
  1083  				addTag(inst, "pseudo64")
  1084  				continue
  1085  			} else if old != nil && hasTag(old, "pseudo") {
  1086  				addTag(inst, "pseudo")
  1087  				continue
  1088  			}
  1089  		}
  1090  		if strings.Contains(inst.opcode, "REX.W ") {
  1091  			if old := seen[strings.Replace(inst.opcode, "REX.W ", "", -1)]; old != nil && old.syntax == inst.syntax {
  1092  				addTag(old, "ignoreREXW")
  1093  				addTag(inst, "pseudo")
  1094  				continue
  1095  			} else if old != nil && hasTag(old, "pseudo") {
  1096  				addTag(inst, "pseudo")
  1097  				continue
  1098  			} else if old != nil && !hasTag(old, "operand16") && !hasTag(old, "operand32") {
  1099  				// There is a 64-bit form of this instruction.
  1100  				// Mark this one as only valid in the non-64-bit operand modes.
  1101  				addTag(old, "operand16")
  1102  				addTag(old, "operand32")
  1103  				continue
  1104  			}
  1105  		}
  1106  	}
  1107  
  1108  	// Undo XCHG hack above.
  1109  	for _, inst := range insts {
  1110  		if strings.HasPrefix(inst.opcode, "XX ") {
  1111  			inst.opcode = strings.TrimPrefix(inst.opcode, "XX ")
  1112  			addTag(inst, "pseudo")
  1113  			removeTag(inst, "pseudo64")
  1114  		}
  1115  	}
  1116  
  1117  	// Last ditch effort. Manual fixes.
  1118  	// Some things are too hard to infer.
  1119  	for _, inst := range insts {
  1120  		for _, fix := range fixup[[2]string{inst.syntax, inst.opcode}] {
  1121  			fix(inst)
  1122  		}
  1123  		sort.Strings(inst.tags)
  1124  	}
  1125  
  1126  	sort.Sort(bySeq(insts))
  1127  
  1128  	if onlySomePages {
  1129  		for _, inst := range extraInsts {
  1130  			op, _ := splitSyntax(inst.syntax)
  1131  			if haveOp[op] {
  1132  				insts = append(insts, inst)
  1133  			}
  1134  		}
  1135  	} else {
  1136  		insts = append(insts, extraInsts...)
  1137  	}
  1138  	return insts
  1139  }
  1140  
  1141  func hasTag(inst *instruction, tag string) bool {
  1142  	for _, t := range inst.tags {
  1143  		if t == tag {
  1144  			return true
  1145  		}
  1146  	}
  1147  	return false
  1148  }
  1149  
  1150  func removeTag(inst *instruction, tag string) {
  1151  	if !hasTag(inst, tag) {
  1152  		return
  1153  	}
  1154  	out := inst.tags[:0]
  1155  	for _, t := range inst.tags {
  1156  		if t != tag {
  1157  			out = append(out, t)
  1158  		}
  1159  	}
  1160  	inst.tags = out
  1161  }
  1162  
  1163  func addTag(inst *instruction, tag string) {
  1164  	if !hasTag(inst, tag) {
  1165  		inst.tags = append(inst.tags, tag)
  1166  	}
  1167  }
  1168  
  1169  type byOpcode []*instruction
  1170  
  1171  func (x byOpcode) Len() int      { return len(x) }
  1172  func (x byOpcode) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  1173  func (x byOpcode) Less(i, j int) bool {
  1174  	if x[i].opcode != x[j].opcode {
  1175  		return opcodeLess(x[i].opcode, x[j].opcode)
  1176  	}
  1177  	if condLess(x[i].syntax, x[j].syntax) {
  1178  		return true
  1179  	}
  1180  	if condLess(x[j].syntax, x[i].syntax) {
  1181  		return false
  1182  	}
  1183  	if x[i].syntax != x[j].syntax {
  1184  		return x[i].syntax < x[j].syntax
  1185  	}
  1186  	return x[i].seq < x[j].seq
  1187  }
  1188  
  1189  type bySeq []*instruction
  1190  
  1191  func (x bySeq) Len() int      { return len(x) }
  1192  func (x bySeq) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  1193  func (x bySeq) Less(i, j int) bool {
  1194  	return x[i].seq < x[j].seq
  1195  }
  1196  
  1197  type bySyntax []*instruction
  1198  
  1199  func (x bySyntax) Len() int      { return len(x) }
  1200  func (x bySyntax) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  1201  func (x bySyntax) Less(i, j int) bool {
  1202  	if x[i].syntax != x[j].syntax {
  1203  		return x[i].syntax < x[j].syntax
  1204  	}
  1205  	return x[i].opcode < x[j].opcode
  1206  }
  1207  
  1208  // condLess reports whether the conditional instruction syntax
  1209  // x should be considered less than y.
  1210  // We sort condition codes we prefer ahead of condition codes we don't,
  1211  // so that the latter are recorded as the pseudo-operations.
  1212  func condLess(x, y string) bool {
  1213  	x, _ = splitSyntax(x)
  1214  	y, _ = splitSyntax(y)
  1215  	for _, pref := range condPrefs {
  1216  		if strings.HasSuffix(x, pref[0]) && strings.HasSuffix(y, pref[1]) && strings.TrimSuffix(x, pref[0]) == strings.TrimSuffix(y, pref[1]) {
  1217  			return true
  1218  		}
  1219  	}
  1220  	return false
  1221  }
  1222  
  1223  // xchgLess reports whether the xchg instruction x should be considered less than y.
  1224  func xchgLess(x, y string) bool {
  1225  	return strings.HasPrefix(x, "XCHG ") && x > y
  1226  }
  1227  
  1228  // opcodeLess reports whether opcode string x should be considered less than y.
  1229  // We sort wildcard fields like "ib" before literal bytes like "0A".
  1230  func opcodeLess(x, y string) bool {
  1231  	for i := 0; i < len(x) || i < len(y); i++ {
  1232  		if i >= len(x) {
  1233  			return true
  1234  		}
  1235  		if i >= len(y) {
  1236  			return false
  1237  		}
  1238  		if x[i] != y[i] {
  1239  			// sort word before doubleword
  1240  			if x[i] == 'w' && y[i] == 'd' {
  1241  				return true
  1242  			}
  1243  			if x[i] == 'd' && y[i] == 'w' {
  1244  				return false
  1245  			}
  1246  			// Sort lower-case before non-lower-case.
  1247  			// This sorts "ib" before literal bytes like "0A", for example.
  1248  			return x[i]-'a' < y[i]-'a'
  1249  		}
  1250  	}
  1251  	return false
  1252  }
  1253  
  1254  // splitOpcode splits an opcode into its fixed and variable portions.
  1255  // For example "05 iw" splits into "05" and "iw".
  1256  func splitOpcode(x string) (fixed, variable string) {
  1257  	i := 0
  1258  	for i < len(x) {
  1259  		c := x[i]
  1260  		if '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || c == ' ' || c == '.' || c == '+' {
  1261  			i++
  1262  			continue
  1263  		}
  1264  		if i+2 <= len(x) && c == '/' {
  1265  			i += 2
  1266  			continue
  1267  		}
  1268  		break
  1269  	}
  1270  	return strings.TrimSpace(x[:i]), x[i:]
  1271  }
  1272  
  1273  // canGenerate reports whether opcode string x can generate opcode string y.
  1274  // For example "D5 ib" can generate "D5 0A".
  1275  // Any string x is not considered to generate itself.
  1276  func canGenerate(x, y string) bool {
  1277  	i := 0
  1278  	for i < len(x) && i < len(y) && x[i] == y[i] {
  1279  		i++
  1280  	}
  1281  	if i == len(x) || i == len(y) {
  1282  		return false
  1283  	}
  1284  	switch x[i:] {
  1285  	case "ib":
  1286  		return len(y[i:]) == 2 && allHex(y[i:])
  1287  	case "0+i":
  1288  		return len(y[i:]) == 1 && '0' <= y[i] && y[i] <= '7'
  1289  	case "8+i":
  1290  		return len(y[i:]) == 1 && (y[i] == '8' || y[i] == '9' || 'A' <= y[i] && y[i] <= 'F')
  1291  	}
  1292  	return false
  1293  }
  1294  
  1295  // allHex reports whether s is entirely hex digits.
  1296  func allHex(s string) bool {
  1297  	for _, c := range s {
  1298  		if '0' <= c && c <= '9' || 'A' <= c && c <= 'F' {
  1299  			continue
  1300  		}
  1301  		return false
  1302  	}
  1303  	return true
  1304  }
  1305  
  1306  // is16vs32pair reports whether x and y are the 16- and 32-bit variants of the same instruction,
  1307  // based on analysis of the mnemonic syntax.
  1308  func is16vs32pair(x, y *instruction) bool {
  1309  	return conv16.Replace(x.syntax) == y.syntax ||
  1310  		strings.Replace(x.syntax, "r16, r/", "r32, r32/", -1) == y.syntax || // LSL etc
  1311  		strings.Replace(x.syntax, "r16", "r32", 1) == y.syntax // MOVSXD, MOVSX, etc
  1312  }
  1313  
  1314  func containsAll(x string, targ ...string) bool {
  1315  	for _, y := range targ {
  1316  		i := strings.Index(x, y)
  1317  		if i < 0 {
  1318  			return false
  1319  		}
  1320  		x = x[i+len(y):]
  1321  	}
  1322  	return true
  1323  }
  1324  

View as plain text