// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import ( "fmt" "os" "sort" "strings" ) // Clean up the data from the Intel manual for correctness // and to annotate details relevant to decoding or encoding, // such as whether an instruction is valid only in certain // operand size modes. // encodeReplace maps (argument, encoding) pairs to the corrected argument. // We use a suffix 1 for the register and 2 for the r/m in the modrm byte. // We use a suffix V for a register number specified in the VEX.vvvv bits. var encodeReplace = map[[2]string]string{ {"mm", "ModRM:reg"}: "mm1", {"mm", "ModRM:r/m"}: "mm2", {"mm1", "ModRM:r/m"}: "mm2", {"mm2", "ModRM:reg"}: "mm1", {"mm/m32", "ModRM:r/m"}: "mm2/m32", {"mm/m64", "ModRM:r/m"}: "mm2/m64", {"xmm", "ModRM:reg"}: "xmm1", {"xmm", "ModRM:r/m"}: "xmm2", {"xmm/m64", "ModRM:r/m"}: "xmm2/m64", {"xmm0", "ModRM:reg"}: "xmm1", {"xmm1", "ModRM:r/m"}: "xmm2", {"xmm1/m16", "ModRM:r/m"}: "xmm2/m16", {"xmm1/m32", "ModRM:r/m"}: "xmm2/m32", {"xmm1/m64", "ModRM:r/m"}: "xmm2/m64", {"xmm1/m128", "ModRM:r/m"}: "xmm2/m128", {"xmm1/m256", "ModRM:r/m"}: "xmm2/m256", {"xmm/m16", "ModRM:r/m"}: "xmm2/m16", {"xmm/m32", "ModRM:r/m"}: "xmm2/m32", {"xmm/m64", "ModRM:r/m"}: "xmm2/m64", {"xmm/m128", "ModRM:r/m"}: "xmm2/m128", {"xmm/m256", "ModRM:r/m"}: "xmm2/m256", {"xmm3", "ModRM:reg"}: "xmm1", {"xmm3", "ModRM:r/m"}: "xmm2", {"xmm3/m16", "ModRM:r/m"}: "xmm2/m16", {"xmm3/m32", "ModRM:r/m"}: "xmm2/m32", {"xmm3/m64", "ModRM:r/m"}: "xmm2/m64", {"xmm3/m128", "ModRM:r/m"}: "xmm2/m128", {"xmm3/m256", "ModRM:r/m"}: "xmm2/m256", {"xmm2", "ModRM:reg"}: "xmm1", {"xmm2/m16", "ModRM:reg"}: "xmm1/m16", {"xmm2/m32", "ModRM:reg"}: "xmm1/m32", {"xmm2/m64", "ModRM:reg"}: "xmm1/m64", {"xmm2/m128", "ModRM:reg"}: "xmm1/m128", {"xmm2/m256", "ModRM:reg"}: "xmm1/m256", {"ymm", "ModRM:reg"}: "ymm1", {"ymm", "ModRM:r/m"}: "ymm2", {"ymm0", "ModRM:reg"}: "ymm1", {"ymm1", "ModRM:r/m"}: "ymm2", {"ymm1/m16", "ModRM:r/m"}: "ymm2/m16", {"ymm1/m32", "ModRM:r/m"}: "ymm2/m32", {"ymm1/m64", "ModRM:r/m"}: "ymm2/m64", {"ymm1/m128", "ModRM:r/m"}: "ymm2/m128", {"ymm1/m256", "ModRM:r/m"}: "ymm2/m256", {"ymm3", "ModRM:reg"}: "ymm1", {"ymm3", "ModRM:r/m"}: "ymm2", {"ymm3/m16", "ModRM:r/m"}: "ymm2/m16", {"ymm3/m32", "ModRM:r/m"}: "ymm2/m32", {"ymm3/m64", "ModRM:r/m"}: "ymm2/m64", {"ymm3/m128", "ModRM:r/m"}: "ymm2/m128", {"ymm3/m256", "ModRM:r/m"}: "ymm2/m256", {"ymm2", "ModRM:reg"}: "ymm1", {"ymm2/m16", "ModRM:reg"}: "ymm1/m16", {"ymm2/m32", "ModRM:reg"}: "ymm1/m32", {"ymm2/m64", "ModRM:reg"}: "ymm1/m64", {"ymm2/m128", "ModRM:reg"}: "ymm1/m128", {"ymm2/m256", "ModRM:reg"}: "ymm1/m256", {"xmm1", "VEX.vvvv"}: "xmmV", {"xmm2", "VEX.vvvv"}: "xmmV", {"ymm1", "VEX.vvvv"}: "ymmV", {"ymm2", "VEX.vvvv"}: "ymmV", {"xmm4", "imm8[7:4]"}: "xmmIH", {"ymm4", "imm8[7:4]"}: "ymmIH", {"r8", "opcode + rd"}: "r8op", {"r16", "opcode + rd"}: "r16op", {"r32", "opcode + rd"}: "r32op", {"r64", "opcode + rd"}: "r64op", {"reg/m32", "ModRM:r/m"}: "r/m32", {"reg/m16", "ModRM:r/m"}: "r32/m16", {"bnd", "ModRM:reg"}: "bnd1", {"bnd2", "ModRM:reg"}: "bnd1", {"bnd1/m64", "ModRM:r/m"}: "bnd2/m64", {"bnd1/m128", "ModRM:r/m"}: "bnd2/m128", {"r32a", "ModRM:reg"}: "r32", {"r64a", "ModRM:reg"}: "r64", {"r32", "VEX.vvvv"}: "r32V", {"r64", "VEX.vvvv"}: "r64V", {"r32b", "VEX.vvvv"}: "r32V", {"r64b", "VEX.vvvv"}: "r64V", {"r64", "VEX.vvvv"}: "r64V", {"ST", "ST(0)"}: "ST(0)", } // A few instructions do not have the usual encoding descriptions. // Supply them. var encodings = map[string][]string{ "FADD m32fp": {"ModRM:r/m (r)"}, "FADD m64fp": {"ModRM:r/m (r)"}, "FADD ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FADD ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FADDP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FIADD m32int": {"ModRM:r/m (r)"}, "FIADD m16int": {"ModRM:r/m (r)"}, "FBLD m80dec": {"ModRM:r/m (r)"}, "FBSTP m80bcd": {"ModRM:r/m (w)"}, "FCMOVB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FCMOVE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FCMOVBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FCMOVU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FCMOVNB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FCMOVNE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FCMOVNBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FCMOVNU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FCOM m32fp": {"ModRM:r/m (r)"}, "FCOM m64fp": {"ModRM:r/m (r)"}, "FCOM ST(i)": {"ST(i) (r)"}, "FCOMP m32fp": {"ModRM:r/m (r)"}, "FCOMP m64fp": {"ModRM:r/m (r)"}, "FCOMP ST(i)": {"ST(i) (r)"}, "FCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, "FCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, "FUCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, "FUCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, "FDIV m32fp": {"ModRM:r/m (r)"}, "FDIV m64fp": {"ModRM:r/m (r)"}, "FDIV ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FDIV ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FDIVP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FIDIV m16int": {"ModRM:r/m (r)"}, "FIDIV m32int": {"ModRM:r/m (r)"}, "FIDIV m64int": {"ModRM:r/m (r)"}, "FDIVR m32fp": {"ModRM:r/m (r)"}, "FDIVR m64fp": {"ModRM:r/m (r)"}, "FDIVR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FDIVR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FDIVRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FIDIVR m16int": {"ModRM:r/m (r)"}, "FIDIVR m32int": {"ModRM:r/m (r)"}, "FIDIVR m64int": {"ModRM:r/m (r)"}, "FFREE ST(i)": {"ST(i) (w)"}, "FICOM m16int": {"ModRM:r/m (r)"}, "FICOM m32int": {"ModRM:r/m (r)"}, "FICOMP m16int": {"ModRM:r/m (r)"}, "FICOMP m32int": {"ModRM:r/m (r)"}, "FILD m16int": {"ModRM:r/m (r)"}, "FILD m32int": {"ModRM:r/m (r)"}, "FILD m64int": {"ModRM:r/m (r)"}, "FIST m16int": {"ModRM:r/m (w)"}, "FIST m32int": {"ModRM:r/m (w)"}, "FISTP m16int": {"ModRM:r/m (w)"}, "FISTP m32int": {"ModRM:r/m (w)"}, "FISTP m64int": {"ModRM:r/m (w)"}, "FISTTP m16int": {"ModRM:r/m (w)"}, "FISTTP m32int": {"ModRM:r/m (w)"}, "FISTTP m64int": {"ModRM:r/m (w)"}, "FLD m32fp": {"ModRM:r/m (r)"}, "FLD m64fp": {"ModRM:r/m (r)"}, "FLD m80fp": {"ModRM:r/m (r)"}, "FLD ST(i)": {"ST(i) (r)"}, "FLDCW m2byte": {"ModRM:r/m (r)"}, "FLDENV m14/28byte": {"ModRM:r/m (r)"}, "FMUL m32fp": {"ModRM:r/m (r)"}, "FMUL m64fp": {"ModRM:r/m (r)"}, "FMUL ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FMUL ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FMULP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FIMUL m16int": {"ModRM:r/m (r)"}, "FIMUL m32int": {"ModRM:r/m (r)"}, "FRSTOR m94/108byte": {"ModRM:r/m (r)"}, "FSAVE m94/108byte": {"ModRM:r/m (w)"}, "FNSAVE m94/108byte": {"ModRM:r/m (w)"}, "FST m32fp": {"ModRM:r/m (w)"}, "FST m64fp": {"ModRM:r/m (w)"}, "FST m80fp": {"ModRM:r/m (w)"}, "FST ST(i)": {"ST(i) (w)"}, "FSTP m32fp": {"ModRM:r/m (w)"}, "FSTP m64fp": {"ModRM:r/m (w)"}, "FSTP m80fp": {"ModRM:r/m (w)"}, "FSTP ST(i)": {"ST(i) (w)"}, "FSTCW m2byte": {"ModRM:r/m (w)"}, "FNSTCW m2byte": {"ModRM:r/m (w)"}, "FSTENV m14/28byte": {"ModRM:r/m (w)"}, "FNSTENV m14/28byte": {"ModRM:r/m (w)"}, "FSTSW m2byte": {"ModRM:r/m (w)"}, "FSTSW AX": {"AX (w)"}, "FNSTSW m2byte": {"ModRM:r/m (w)"}, "FNSTSW AX": {"AX (w)"}, "FSUB m32fp": {"ModRM:r/m (r)"}, "FSUB m64fp": {"ModRM:r/m (r)"}, "FSUB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FSUB ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FSUBP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FISUB m16int": {"ModRM:r/m (r)"}, "FISUB m32int": {"ModRM:r/m (r)"}, "FSUBR m32fp": {"ModRM:r/m (r)"}, "FSUBR m64fp": {"ModRM:r/m (r)"}, "FSUBR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, "FSUBR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FSUBRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, "FISUBR m16int": {"ModRM:r/m (r)"}, "FISUBR m32int": {"ModRM:r/m (r)"}, "FISUBR m64int": {"ModRM:r/m (r)"}, "FUCOM ST(i)": {"ST(i) (r)"}, "FUCOMP ST(i)": {"ST(i) (r)"}, "FXCH ST(i)": {"ST(i) (r, w)"}, "POP DS": {"DS (w)"}, "POP ES": {"ES (w)"}, "POP FS": {"FS (w)"}, "POP GS": {"GS (w)"}, "POP SS": {"SS (w)"}, "POP CS": {"CS (w)"}, "PUSH CS": {"CS (r)"}, "PUSH DS": {"DS (r)"}, "PUSH ES": {"ES (r)"}, "PUSH FS": {"FS (r)"}, "PUSH GS": {"GS (r)"}, "PUSH SS": {"SS (r)"}, "INT 3": {"3 (r)"}, // In manual but hard to parse "BNDLDX bnd, mib": {"ModRM:reg (w)", "ModRM:r/m (r)"}, "BNDSTX mib, bnd": {"ModRM:r/m (r)", "ModRM:reg (r)"}, // In manual but wrong "CALL rel16": {"Offset"}, "CALL rel32": {"Offset"}, "IN AL, imm8": {"AL (w)", "imm8 (r)"}, "IN AX, imm8": {"AX (w)", "imm8 (r)"}, "IN EAX, imm8": {"EAX (w)", "imm8 (r)"}, "IN AL, DX": {"AL (w)", "DX (r)"}, "IN AX, DX": {"AX (w)", "DX (r)"}, "IN EAX, DX": {"EAX (w)", "DX (r)"}, "OUT DX, AL": {"DX (r)", "AL (r)"}, "OUT DX, AX": {"DX (r)", "AX (r)"}, "OUT DX, EAX": {"DX (r)", "EAX (r)"}, "OUT imm8, AL": {"imm8 (r)", "AL (r)"}, "OUT imm8, AX": {"imm8 (r)", "AX (r)"}, "OUT imm8, EAX": {"imm8 (r)", "EAX (r)"}, "XCHG AX, r16": {"AX (r, w)", "opcode + rd (r, w)"}, "XCHG EAX, r32": {"EAX (r, w)", "opcode + rd (r, w)"}, "XCHG RAX, r64": {"RAX (r, w)", "opcode + rd (r, w)"}, // Encoding not listed. "INVEPT r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, "INVEPT r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, "INVVPID r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, "INVVPID r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, "VMREAD r/m32, r32": {"ModRM:r/m (w)", "ModRM:reg (r)"}, "VMREAD r/m64, r64": {"ModRM:r/m (w)", "ModRM:reg (r)"}, "VMWRITE r32, r/m32": {"ModRM:reg (r)", "ModRM:r/m (r)"}, "VMWRITE r64, r/m64": {"ModRM:reg (r)", "ModRM:r/m (r)"}, "VMCLEAR m64": {"ModRM:r/m (w)"}, "VMPTRLD m64": {"ModRM:r/m (r)"}, "VMPTRST m64": {"ModRM:r/m (w)"}, "VMXON m64": {"ModRM:r/m (r)"}, } // opAction lists the read/write actions for individual opcodes, // where the manual does not. var opAction = map[string][]string{ "ADC": {"rw", "r"}, "ADD": {"rw", "r"}, "AND": {"rw", "r"}, "BLENDVPD": {"rw", "r", "r"}, "BLENDVPS": {"rw", "r", "r"}, "IN": {"w", "r"}, "MOV": {"w", "r"}, "OR": {"rw", "r"}, "OUT": {"r", "r"}, "PBLENDVB": {"rw", "r", "r"}, "RCL": {"rw", "r"}, "RCR": {"rw", "r"}, "ROL": {"rw", "r"}, "ROR": {"rw", "r"}, "SAL": {"rw", "r"}, "SAR": {"rw", "r"}, "SBB": {"rw", "r"}, "SHL": {"rw", "r"}, "SHLD": {"rw", "r", "r"}, "SHR": {"rw", "r"}, "SHRD": {"rw", "r", "r"}, "SUB": {"rw", "r", "r"}, "TEST": {"r", "r"}, "VBLENDVPD": {"rw", "r", "r"}, "VBLENDVPS": {"rw", "r", "r"}, "VPBLENDVB": {"rw", "r", "r"}, "VPMASKMOVD": {"w", "r", "r"}, "VPMASKMOVQ": {"w", "r", "r"}, "VPSLLVD": {"w", "r", "r"}, "VPSRAVD": {"w", "r", "r"}, "VPSRLVD": {"w", "r", "r"}, "VPSRLVQ": {"w", "r", "r"}, "VINSERTI128": {"w", "r", "r"}, "VPBLENDD": {"w", "r", "r"}, "VPERMD": {"w", "r", "r"}, "VPERMPS": {"w", "r", "r"}, "VPERM2I128": {"w", "r", "r"}, "VPSLLVQ": {"w", "r", "r"}, "XCHG": {"rw", "rw"}, "XOR": {"rw", "r"}, } // encodeOK lists valid arg, encoding pairs. // Any pair not listed gets a warning. var encodeOK = map[[2]string]bool{ {"0", "imm8"}: true, {"1", "1"}: true, {"1", "imm8"}: true, {"", ""}: true, {"", "implicit XMM0"}: true, {"AL", "AL"}: true, {"AL", "AL/AX/EAX/RAX"}: true, {"AX", "AL/AX/EAX/RAX"}: true, {"AX", "AX"}: true, {"AX", "AX/EAX/RAX"}: true, {"CL", "CL"}: true, {"CR0-CR7", "ModRM:reg"}: true, {"CR8", ""}: true, {"CS", "CS"}: true, {"DR0-DR7", "ModRM:reg"}: true, {"DS", "DS"}: true, {"DX", "DX"}: true, {"EAX", "AL/AX/EAX/RAX"}: true, {"EAX", "AX/EAX/RAX"}: true, {"EAX", "EAX"}: true, {"ES", "ES"}: true, {"FS", "FS"}: true, {"GS", "GS"}: true, {"RAX", "AL/AX/EAX/RAX"}: true, {"RAX", "AX/EAX/RAX"}: true, {"RAX", "RAX"}: true, {"ST", "ST(0)"}: true, {"ST(0)", "ST(0)"}: true, {"ST(i)", "ST(i)"}: true, {"Sreg", "ModRM:reg"}: true, {"bnd1", "ModRM:reg"}: true, {"bnd2/m128", "ModRM:r/m"}: true, {"bnd2/m64", "ModRM:r/m"}: true, {"imm16", "imm16"}: true, {"imm16", "imm8"}: true, {"imm16", "imm8/16/32"}: true, {"imm16", "imm8/16/32"}: true, {"imm16", "imm8/16/32/64"}: true, {"imm16", "iw"}: true, {"imm32", "imm8"}: true, {"imm32", "imm8/16/32"}: true, {"imm32", "imm8/16/32"}: true, {"imm32", "imm8/16/32/64"}: true, {"imm64", "imm8/16/32/64"}: true, {"imm8", "imm8"}: true, {"imm8", "imm8/16/32"}: true, {"imm8", "imm8/16/32"}: true, {"imm8", "imm8/16/32/64"}: true, {"imm8", "imm8[3:0]"}: true, {"m", "ModRM:r/m"}: true, {"m128", "ModRM:r/m"}: true, {"m14/28byte", "ModRM:r/m"}: true, {"m16", "ModRM:r/m"}: true, {"m16&16", "ModRM:r/m"}: true, {"m16&32", "ModRM:r/m"}: true, {"m16&64", "ModRM:r/m"}: true, {"m16:16", "ModRM:r/m"}: true, {"m16:16", "Offset"}: true, {"m16:32", "ModRM:r/m"}: true, {"m16:32", "Offset"}: true, {"m16:64", "ModRM:r/m"}: true, {"m16:64", "Offset"}: true, {"m16int", "ModRM:r/m"}: true, {"m256", "ModRM:r/m"}: true, {"m2byte", "ModRM:r/m"}: true, {"m32", "ModRM:r/m"}: true, {"m32&32", "ModRM:r/m"}: true, {"m32fp", "ModRM:r/m"}: true, {"m32int", "ModRM:r/m"}: true, {"m512byte", "ModRM:r/m"}: true, {"m64", "ModRM:r/m"}: true, {"m64fp", "ModRM:r/m"}: true, {"m64int", "ModRM:r/m"}: true, {"m8", "ModRM:r/m"}: true, {"m80bcd", "ModRM:r/m"}: true, {"m80dec", "ModRM:r/m"}: true, {"m80fp", "ModRM:r/m"}: true, {"m94/108byte", "ModRM:r/m"}: true, {"mem", "ModRM:r/m"}: true, {"mib", "ModRM:r/m"}: true, {"mm/m32", "ModRM:r/m"}: true, {"mm1", "ModRM:reg"}: true, {"mm2", "ModRM:r/m"}: true, {"mm2/m32", "ModRM:r/m"}: true, {"mm2/m64", "ModRM:r/m"}: true, {"moffs16", "Moffs"}: true, {"moffs32", "Moffs"}: true, {"moffs64", "Moffs"}: true, {"moffs8", "Moffs"}: true, {"ptr16:16", "Offset"}: true, {"ptr16:32", "Offset"}: true, {"r/m16", "ModRM:r/m"}: true, {"r/m32", "ModRM:r/m"}: true, {"r/m64", "ModRM:r/m"}: true, {"r/m8", "ModRM:r/m"}: true, {"r16", "ModRM:reg"}: true, {"r16op", "opcode + rd"}: true, {"r32", "ModRM:reg"}: true, {"r32", "VEX.vvvv"}: true, {"r32/m16", "ModRM:r/m"}: true, {"r32/m8", "ModRM:r/m"}: true, {"r32V", "VEX.vvvv"}: true, {"r32op", "opcode + rd"}: true, {"r64", "ModRM:reg"}: true, {"r64/m16", "ModRM:r/m"}: true, {"r64V", "VEX.vvvv"}: true, {"r64op", "opcode + rd"}: true, {"r8", "ModRM:reg"}: true, {"r8op", "opcode + rd"}: true, {"rel16", "Offset"}: true, {"rel32", "Offset"}: true, {"rel8", "Offset"}: true, {"rmr16", "ModRM:r/m"}: true, {"rmr32", "ModRM:r/m"}: true, {"rmr64", "ModRM:r/m"}: true, {"xmm/m128", "ModRM:r/m"}: true, {"xmm/m32", "ModRM:r/m"}: true, {"xmm1", "ModRM:reg"}: true, {"xmm2", "ModRM:r/m"}: true, {"xmm2/m128", "ModRM:r/m"}: true, {"xmm2/m16", "ModRM:r/m"}: true, {"xmm2/m32", "ModRM:r/m"}: true, {"xmm2/m64", "ModRM:r/m"}: true, {"xmm2/m8", "ModRM:r/m"}: true, {"xmmIH", "imm8[7:4]"}: true, {"xmmV", "VEX.vvvv"}: true, {"ymm1", "ModRM:reg"}: true, {"ymm2", "ModRM:r/m"}: true, {"ymm2/m256", "ModRM:r/m"}: true, {"ymmIH", "imm8[7:4]"}: true, {"ymmV", "VEX.vvvv"}: true, {"vm32x", "vsib"}: true, {"vm64x", "vsib"}: true, {"vm32y", "vsib"}: true, {"vm64y", "vsib"}: true, {"SS", "SS"}: true, {"3", "3"}: true, } // instBlacklist lists the instruction syntaxes to ignore when parsing. // We exclude Intel's general forms for these not-actually-general instructions. // The syntax makes it look like arbitrary memory operands can be used when in fact // the exact address is fixed in all cases - [DI] or [SI], for example var instBlacklist = map[string]bool{ "CMPS m16, m16": true, "CMPS m32, m32": true, "CMPS m64, m64": true, "CMPS m8, m8": true, "INS m16, DX": true, "INS m32, DX": true, "INS m8, DX": true, "LODS m16": true, "LODS m32": true, "LODS m64": true, "LODS m8": true, "MOVS m16, m16": true, "MOVS m32, m32": true, "MOVS m64, m64": true, "MOVS m8, m8": true, "OUTS DX, m16": true, "OUTS DX, m32": true, "OUTS DX, m8": true, "REP INS m16, DX": true, "REP INS m32, DX": true, "REP INS m8, DX": true, "REP INS r/m32, DX": true, "REP LODS AL": true, "REP LODS AX": true, "REP LODS EAX": true, "REP LODS RAX": true, "REP MOVS m16, m16": true, "REP MOVS m32, m32": true, "REP MOVS m64, m64": true, "REP MOVS m8, m8": true, "REP OUTS DX, m16": true, "REP OUTS DX, m32": true, "REP OUTS DX, m8": true, "REP OUTS DX, r/m16": true, "REP OUTS DX, r/m32": true, "REP OUTS DX, r/m8": true, "REP STOS m16": true, "REP STOS m32": true, "REP STOS m64": true, "REP STOS m8": true, "REPE CMPS m16, m16": true, "REPE CMPS m32, m32": true, "REPE CMPS m64, m64": true, "REPE CMPS m8, m8": true, "REPE SCAS m16": true, "REPE SCAS m32": true, "REPE SCAS m64": true, "REPE SCAS m8": true, "REPNE CMPS m16, m16": true, "REPNE CMPS m32, m32": true, "REPNE CMPS m64, m64": true, "REPNE CMPS m8, m8": true, "REPNE SCAS m16": true, "REPNE SCAS m32": true, "REPNE SCAS m64": true, "REPNE SCAS m8": true, "SCAS m16": true, "SCAS m32": true, "SCAS m64": true, "SCAS m8": true, "STOS m16": true, "STOS m32": true, "STOS m64": true, "STOS m8": true, "XLAT m8": true, } // condPrefs lists preferences for condition code suffixes. // The first suffix in each pair takes priority over the second. var condPrefs = [][2]string{ {"B", "C"}, {"B", "NAE"}, {"AE", "NB"}, {"AE", "NC"}, {"E", "Z"}, {"NE", "NZ"}, {"BE", "NA"}, {"A", "NBE"}, {"P", "PE"}, {"NP", "PO"}, {"L", "NGE"}, {"GE", "NL"}, {"LE", "NG"}, {"G", "NLE"}, } // conv16 specifies replacements to turn a 16-bit syntax into a 32-bit syntax. // If the conv16 can be applied to one form to create a new form with the same // fixed instruction prefix, the pair is tagged as operand16 and operand32 // respectively. var conv16 = strings.NewReplacer( "16:16", "16:32", "16", "32", "AX", "EAX", "CBW", "CWDE", "CMPSW", "CMPSD", "CWD", "CDQ", "INSW", "INSD", "IRET", "IRETD", "LODSW", "LODSD", "MOVSW", "MOVSD", "OUTSW", "OUTSD", "POPA", "POPAD", "POPF", "POPFD", "PUSHA", "PUSHAD", "PUSHF", "PUSHFD", "SCASW", "SCASD", "STOSW", "STOSD", ) // fixup records additional modifications needed that are not derived // from the instructions in the manual. It is keyed by the syntax and opcode. var fixup = map[[2]string][]fixer{ // NOP is a very special case overloading XCHG AX, AX. // The decoder handles it in custom code; exclude from the usual tables. {"NOP", "90"}: {fixAddTag("pseudo")}, // PAUSE is a special case of NOP. {"PAUSE", "F3 90"}: {fixAddTag("pseudo")}, // used to add 'keepop' tag but not sure what that means // Far CALL, JMP, RET are given L prefix (long) for disambiguation. {"CALL m16:16", "FF /3"}: {fixRename("CALL_FAR")}, {"CALL m16:32", "FF /3"}: {fixRename("CALL_FAR")}, {"CALL m16:64", "REX.W FF /3"}: {fixRename("CALL_FAR")}, {"CALL ptr16:16", "9A cd"}: {fixRename("CALL_FAR")}, {"CALL ptr16:32", "9A cp"}: {fixRename("CALL_FAR")}, {"JMP m16:16", "FF /5"}: {fixRename("JMP_FAR")}, {"JMP m16:32", "FF /5"}: {fixRename("JMP_FAR")}, {"JMP m16:64", "REX.W FF /5"}: {fixRename("JMP_FAR")}, {"JMP ptr16:16", "EA cd"}: {fixRename("JMP_FAR")}, {"JMP ptr16:32", "EA cp"}: {fixRename("JMP_FAR")}, {"RET imm16", "CA iw"}: {fixRename("RET_FAR"), fixArg(0, "imm16u")}, {"RET", "CB"}: {fixRename("RET_FAR")}, // Unsigned immediates. (RET far imm16 handled above.) // Some of these are just preferences for disassembling. {"ENTER imm16, imm8", "C8 iw ib"}: {fixArg(1, "imm8b")}, {"RET imm16", "C2 iw"}: {fixArg(0, "imm16u")}, {"IN AL, imm8", "E4 ib"}: {fixArg(1, "imm8u")}, {"IN AX, imm8", "E5 ib"}: {fixArg(1, "imm8u")}, {"IN EAX, imm8", "E5 ib"}: {fixArg(1, "imm8u"), fixAddTag("operand64")}, {"OUT imm8, AL", "E6 ib"}: {fixArg(0, "imm8u")}, {"OUT imm8, AX", "E7 ib"}: {fixArg(0, "imm8u")}, {"OUT imm8, EAX", "E7 ib"}: {fixArg(0, "imm8u"), fixAddTag("operand64")}, {"MOV r8op, imm8", "B0+rb ib"}: {fixArg(1, "imm8u")}, {"MOV r8op, imm8", "REX B0+rb ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")}, {"MOV r/m8, imm8", "C6 /0 ib"}: {fixArg(1, "imm8u")}, {"MOV r/m8, imm8", "REX C6 /0 ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")}, // The listings for MOVSX and MOVSXD do not list some variants that // assemblers seem to allow. // As a result, this instruction got the wrong tag. // The other instructions are listed in extraInsts. {"MOVSX r32, r/m16", "0F BF /r"}: {fixRemoveTag("operand16"), fixAddTag("operand32")}, {"MOVZX r32, r/m16", "0F B7 /r"}: {fixRemoveTag("operand16")}, // Listings are incomplete or incorrect. Fix tags to adjust for new instructions below. {"SLDT r/m16", "0F 00 /0"}: {fixRemoveTag("operand32")}, {"STR r/m16", "0F 00 /1"}: {fixAddTag("operand16")}, {"BSWAP r32op", "0F C8+rd"}: {fixRemoveTag("operand16")}, {"MOV Sreg, r/m16", "8E /r"}: {fixRemoveTag("operand32")}, {"MOV Sreg, r/m64", "REX.W 8E /r"}: {fixArg(1, "r/m16")}, {"MOV r/m64, Sreg", "REX.W 8C /r"}: {fixArg(0, "r/m16")}, {"MOV r/m16, Sreg", "8C /r"}: {fixRemoveTag("operand32")}, {"MOV r/m64, imm32", "REX.W C7 /0 io"}: {fixOpcode("REX.W C7 /0 id")}, // On 64-bit, these ignore 64-bit mode change. {"POP FS", "0F A1"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))}, {"POP GS", "0F A9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))}, {"LEAVE", "C9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))}, {"IN EAX, DX", "ED"}: {fixAddTag("operand64")}, {"INSD", "6D"}: {fixAddTag("operand64")}, {"OUT DX, EAX", "EF"}: {fixAddTag("operand64")}, {"OUTSD", "6F"}: {fixAddTag("operand64")}, {"XBEGIN rel32", "C7 F8 cd"}: {fixAddTag("operand64")}, // Treat FWAIT, not WAIT, as canonical. {"FWAIT", "9B"}: {fixRemoveTag("pseudo")}, {"WAIT", "9B"}: {fixAddTag("pseudo")}, // LAHF and SAHF are listed as "Invalid*" for 64-bit mode. // They are actually defined, so Valid from our point of view. // It's just that only a very few 64-bit processors allowed them. {"LAHF", "9F"}: {fixValid("V", "V")}, {"SAHF", "9E"}: {fixValid("V", "V")}, // The JZ forms are listed twice in the table, which confuses things. {"JZ rel16", "0F 84 cw"}: {fixAddTag("operand16"), fixRemoveTag("operand32")}, {"JZ rel32", "0F 84 cd"}: {fixAddTag("operand32"), fixRemoveTag("operand16")}, // XCHG has two of every instruction, which makes things bad. // The XX hack below takes care of most problems but this one remains. {"XCHG r/m16, r16", "87 /r"}: {fixRemoveTag("pseudo")}, // MOV CR8 is just the obvious extension of the MOV CR0-CR7 form. {"MOV rmr64, CR8", "REX.R + 0F 20 /0"}: {fixAddTag("pseudo")}, {"MOV CR8, rmr64", "REX.R + 0F 22 /0"}: {fixAddTag("pseudo")}, // TODO: EXPLAIN ALL THESE {"ADCX r32, r/m32", "66 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")}, {"ADOX r32, r/m32", "F3 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")}, {"POPFQ", "9D"}: {fixAddTag("operand32"), fixAddTag("operand64")}, {"PUSHFQ", "9C"}: {fixAddTag("operand32"), fixAddTag("operand64")}, {"JCXZ rel8", "E3 cb"}: {fixAddTag("address16")}, {"JECXZ rel8", "E3 cb"}: {fixAddTag("address32")}, {"JRCXZ rel8", "E3 cb"}: {fixAddTag("address64")}, {"PUSH r64op", "50+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")}, {"PUSH r/m64", "FF /6"}: {fixAddTag("operand32"), fixAddTag("operand64")}, {"POP r64op", "58+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")}, {"POP r/m64", "8F /0"}: {fixAddTag("operand32"), fixAddTag("operand64")}, {"SMSW r/m16", "0F 01 /4"}: {fixAddTag("operand16")}, {"SMSW r32/m16", "0F 01 /4"}: {fixRemoveTag("operand16"), fixAddTag("operand32")}, // Express to the decoder that the rel16 only applies in 16-bit operand mode. {"JA rel16", "0F 87 cw"}: {fixAddTag("operand16")}, {"JAE rel16", "0F 83 cw"}: {fixAddTag("operand16")}, {"JB rel16", "0F 82 cw"}: {fixAddTag("operand16")}, {"JBE rel16", "0F 86 cw"}: {fixAddTag("operand16")}, {"JE rel16", "0F 84 cw"}: {fixAddTag("operand16")}, {"JG rel16", "0F 8F cw"}: {fixAddTag("operand16")}, {"JGE rel16", "0F 8D cw"}: {fixAddTag("operand16")}, {"JL rel16", "0F 8C cw"}: {fixAddTag("operand16")}, {"JLE rel16", "0F 8E cw"}: {fixAddTag("operand16")}, {"JNE rel16", "0F 85 cw"}: {fixAddTag("operand16")}, {"JNO rel16", "0F 81 cw"}: {fixAddTag("operand16")}, {"JNP rel16", "0F 8B cw"}: {fixAddTag("operand16")}, {"JNS rel16", "0F 89 cw"}: {fixAddTag("operand16")}, {"JO rel16", "0F 80 cw"}: {fixAddTag("operand16")}, {"JP rel16", "0F 8A cw"}: {fixAddTag("operand16")}, {"JS rel16", "0F 88 cw"}: {fixAddTag("operand16")}, {"JA rel32", "0F 87 cd"}: {fixAddTag("operand32")}, {"JAE rel32", "0F 83 cd"}: {fixAddTag("operand32")}, {"JB rel32", "0F 82 cd"}: {fixAddTag("operand32")}, {"JBE rel32", "0F 86 cd"}: {fixAddTag("operand32")}, {"JE rel32", "0F 84 cd"}: {fixAddTag("operand32")}, {"JG rel32", "0F 8F cd"}: {fixAddTag("operand32")}, {"JGE rel32", "0F 8D cd"}: {fixAddTag("operand32")}, {"JL rel32", "0F 8C cd"}: {fixAddTag("operand32")}, {"JLE rel32", "0F 8E cd"}: {fixAddTag("operand32")}, {"JNE rel32", "0F 85 cd"}: {fixAddTag("operand32")}, {"JNO rel32", "0F 81 cd"}: {fixAddTag("operand32")}, {"JNP rel32", "0F 8B cd"}: {fixAddTag("operand32")}, {"JNS rel32", "0F 89 cd"}: {fixAddTag("operand32")}, {"JO rel32", "0F 80 cd"}: {fixAddTag("operand32")}, {"JP rel32", "0F 8A cd"}: {fixAddTag("operand32")}, {"JS rel32", "0F 88 cd"}: {fixAddTag("operand32")}, {"LSL r16, r/m16", "0F 03 /r"}: {fixAddTag("operand16")}, } var extraInsts = []*instruction{ // Undocumented. {syntax: "ICEBP", opcode: "F1", valid32: "V", valid64: "V"}, {syntax: "UD1", opcode: "0F B9", valid32: "V", valid64: "V"}, {syntax: "FFREEP ST(i)", opcode: "DF C0+i", valid32: "V", valid64: "V", action: "w"}, // Where did these come from? They were in version 0.01 of the csv table. {syntax: "MOVNTSD m64, xmm1", opcode: "F2 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"}, {syntax: "MOVNTSS m32, xmm1", opcode: "F3 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"}, // These express to the decoder that in 64-bit mode // an operand prefix does not affect the size of the relative offset. {syntax: "CALL rel32", opcode: "E8 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JMP rel32", opcode: "E9 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JA rel32", opcode: "0F 87 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JAE rel32", opcode: "0F 83 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JB rel32", opcode: "0F 82 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JBE rel32", opcode: "0F 86 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JE rel32", opcode: "0F 84 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JG rel32", opcode: "0F 8F cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JGE rel32", opcode: "0F 8D cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JL rel32", opcode: "0F 8C cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JLE rel32", opcode: "0F 8E cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JNE rel32", opcode: "0F 85 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JNO rel32", opcode: "0F 81 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JNP rel32", opcode: "0F 8B cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JNS rel32", opcode: "0F 89 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JO rel32", opcode: "0F 80 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JP rel32", opcode: "0F 8A cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, {syntax: "JS rel32", opcode: "0F 88 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, // Disassemblers recognize these, but they're not in the manual. // Not sure if they really exist. // The 16-16 and 32-32 forms don't really make sense since there's nothing to extend. {syntax: "MOVSX r16, r/m16", opcode: "0F BF /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"}, {syntax: "MOVSXD r16, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand16"}, action: "w,r"}, {syntax: "MOVSXD r32, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand32"}, action: "w,r"}, {syntax: "MOVZX r16, r/m16", opcode: "0F B7 /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"}, {syntax: "LAR r64, r/m16", opcode: "REX.W 0F 02 /r", valid32: "N.E.", valid64: "V", action: "w,r"}, {syntax: "SLDT r32/m16", opcode: "0F 00 /0", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"}, {syntax: "STR r32/m16", opcode: "0F 00 /1", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"}, {syntax: "STR r64/m16", opcode: "REX.W 0F 00 /1", valid32: "N.E.", valid64: "V", action: "w"}, {syntax: "BSWAP r16op", opcode: "0F C8+rd", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "rw"}, // Do these exist? // I am not sure where they came from, and xed doesn't recognize them. //{syntax: "MOV TR0-TR7, rmr32", opcode: "0F 26 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"}, //{syntax: "MOV TR0-TR7, rmr64", opcode: "0F 26 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"}, //{syntax: "MOV rmr32, TR0-TR7", opcode: "0F 24 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"}, //{syntax: "MOV rmr64, TR0-TR7", opcode: "0F 24 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"}, {syntax: "MOV Sreg, r32/m16", opcode: "8E /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"}, {syntax: "MOV r/m32, Sreg", opcode: "8C /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"}, } type fixer func(*instruction) func fixAddTag(tag string) fixer { return func(inst *instruction) { addTag(inst, tag) } } func fixRemoveTag(tag string) fixer { return func(inst *instruction) { removeTag(inst, tag) } } func fixRename(op string) fixer { return func(inst *instruction) { _, args := splitSyntax(inst.syntax) inst.syntax = joinSyntax(op, args) } } func fixArg(i int, arg string) fixer { return func(inst *instruction) { op, args := splitSyntax(inst.syntax) args[i] = arg inst.syntax = joinSyntax(op, args) } } func fixIfValid(valid32, valid64 string, fix fixer) fixer { return func(inst *instruction) { if inst.valid32 == valid32 && inst.valid64 == valid64 { fix(inst) } } } func fixValid(valid32, valid64 string) fixer { return func(inst *instruction) { inst.valid32 = valid32 inst.valid64 = valid64 } } func fixOpcode(opcode string) fixer { return func(inst *instruction) { inst.opcode = opcode } } func cleanup(insts []*instruction) []*instruction { var haveOp map[string]bool if onlySomePages { haveOp = map[string]bool{} } // Clean individual instruction encodings and opcode sequences. sawJZ := map[string]bool{} out := insts[:0] for seq, inst := range insts { inst.seq = seq // There are two copies each of JZ rel16 and JZ rel32. Delete the second. if strings.HasPrefix(inst.syntax, "JZ rel") { if sawJZ[inst.syntax] { continue } sawJZ[inst.syntax] = true } out = append(out, inst) // Intel CMPXCHG16B and CMPXCHG8B have surprise "m64" or " m128" at end of encoding. surprises := []string{ " m64", " m128", } for _, s := range surprises { if strings.HasSuffix(inst.syntax, s) && strings.HasSuffix(inst.opcode, s) { inst.opcode = strings.TrimSuffix(inst.opcode, s) } } op, args := splitSyntax(inst.syntax) op = strings.TrimRight(op, "*") inst.syntax = joinSyntax(op, args) // Check argument names in syntax against encoding details. if enc, ok := encodings[inst.syntax]; ok { inst.args = enc } if len(args) == len(inst.args)+1 && args[len(args)-1] == "imm8" { fixed := make([]string, len(args)) copy(fixed, inst.args) fixed[len(args)-1] = "imm8" inst.args = fixed } else if len(args) == 0 && len(inst.args) == 1 && inst.args[0] == "NA" { inst.args = []string{} } else if len(args) != len(inst.args) { fmt.Fprintf(os.Stderr, "p.%d: %s has %d args but %d encoding details:\n\t%s\n", inst.page, inst.syntax, len(args), len(inst.args), strings.Join(inst.args, "; ")) inst.syntax = joinSyntax(op, args) continue } var action []string for i, arg := range args { arg = strings.TrimSpace(arg) arg = strings.TrimRight(arg, "*") if (arg == "reg" || strings.HasPrefix(arg, "reg/")) && containsAll(inst.desc, "upper bits", "r64", "zero") { arg = "r32" + strings.TrimPrefix(arg, "reg") } enc := inst.args[i] enc = strings.TrimSpace(enc) switch { case strings.HasSuffix(enc, " (r))"): enc = strings.TrimSuffix(enc, ")") case strings.HasSuffix(enc, " (R)"): enc = strings.TrimSuffix(enc, " (R)") + " (r)" case strings.HasSuffix(enc, " (W)"): enc = strings.TrimSuffix(enc, " (W)") + " (w)" case strings.HasSuffix(enc, " (r,w)"): enc = strings.TrimSuffix(enc, " (r,w)") + " (r, w)" case enc == "Imm8": enc = "imm8" case enc == "imm8/26/32": enc = "imm8/16/32" case enc == "BaseReg (R): VSIB:base, VectorReg(R): VSIB:index": enc = "vsib (r)" } inst.args[i] = enc switch { case strings.HasSuffix(enc, " (r)"): action = append(action, "r") enc = strings.TrimSuffix(enc, " (r)") case strings.HasSuffix(enc, " (w)"): action = append(action, "w") enc = strings.TrimSuffix(enc, " (w)") case strings.HasSuffix(enc, " (r, w)"): action = append(action, "rw") enc = strings.TrimSuffix(enc, " (r, w)") case strings.HasPrefix(enc, "imm"), enc == "Offset", enc == "iw", arg == "1", arg == "0", arg == "3": action = append(action, "r") case i < len(opAction[op]): action = append(action, opAction[op][i]) default: fmt.Fprintf(os.Stderr, "p.%d: %s has encoding %s for %s but no r/w annotations\n", inst.page, inst.syntax, enc, arg) action = append(action, "?") } if arg == "mem" && op == "LDDQU" { arg = "m128" } if arg == "reg" && op == "LAR" { arg = "r32" } if actual := encodeReplace[[2]string{arg, enc}]; actual != "" { arg = actual } if (arg == "r8" || arg == "r16" || arg == "r32" || arg == "r64") && enc == "ModRM:r/m" { addTag(inst, "modrm_regonly") arg = "rmr" + arg[1:] } if (arg == "xmm2" || arg == "ymm2") && enc == "ModRM:r/m" { addTag(inst, "modrm_regonly") } if (arg == "m8" || arg == "m16" || arg == "m32" || arg == "m64" || arg == "m128" || arg == "m256") && enc == "ModRM:r/m" { addTag(inst, "modrm_memonly") } if arg == "r64" && (inst.syntax == "MOV r64, CR8" || inst.syntax == "MOV CR8, r64") { arg = "rmr64" addTag(inst, "modrm_regonly") } if arg == "CR8" { enc = "" } if !encodeOK[[2]string{arg, enc}] { fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s for %s\n\t{%q, %q}: true,\n", inst.page, inst.syntax, enc, arg, arg, enc) } args[i] = arg // Intel SETcc and others are missing the /r. // But CALL rel16 and CALL rel32 have a bad encoding table so ignore the ModRM there. if strings.HasPrefix(enc, "ModRM") && !strings.Contains(inst.opcode, " /") && op != "CALL" { inst.opcode += " /r" } if strings.HasPrefix(enc, "ModRM:reg") && !strings.Contains(inst.opcode, "/r") { // The opcode is taken up with something else. Bug in table. fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s: no reg field in %s\n", inst.page, inst.syntax, arg, inst.opcode) } // XBEGIN is missing cw cd. if enc == "Offset" && arg == "rel16" && !strings.Contains(inst.opcode, " cw") { inst.opcode += " cw" } if enc == "Offset" && arg == "rel32" && !strings.Contains(inst.opcode, " cd") { inst.opcode += " cd" } if enc == "Moffs" && !strings.Contains(inst.opcode, "cm") { inst.opcode += " cm" } inst.action = strings.Join(action, ",") } inst.syntax = joinSyntax(op, args) // The Intel manual lists each XCHG form with arguments in both orders. // While this is technically correct, it confuses lots of the analysis. // Change half of them to start with a fake "XX" byte. if op == "XCHG" && !strings.HasPrefix(args[0], "r/") && !strings.HasSuffix(args[0], "op") { inst.opcode = "XX " + inst.opcode } // Intel manual is not great about disabling REX instructions on 32-bit systems. if strings.Contains(inst.opcode, "REX") && inst.valid32 == "V" { inst.valid32 = "N.E." } if inst.valid32 == "V" { switch { case containsAll(inst.compat, "not supported", "earlier than the Intel486"): inst.cpuid = "486" case containsAll(inst.compat, "not supported", "earlier than the Pentium"), containsAll(inst.compat, "were introduced", "with the Pentium"): inst.cpuid = "Pentium" case containsAll(inst.compat, "were introduced", "in the Pentium II"): inst.cpuid = "PentiumII" case containsAll(inst.compat, "were introduced", "in the P6 family"), containsAll(inst.compat, "were introduced in P6 family"): addTag(inst, "P6") } } if onlySomePages { op, _ := splitSyntax(inst.syntax) haveOp[op] = true } } insts = out sort.Sort(byOpcode(insts)) // Detect operand size dependencies. var last *instruction for _, inst := range insts { if last != nil { f1, _ := splitOpcode(last.opcode) f2, _ := splitOpcode(inst.opcode) if f1 == f2 { // Conflict: cannot distinguish instructions based on fixed prefix. if is16vs32pair(last, inst) { addTag(last, "operand16") addTag(inst, "operand32") continue } if is16vs32pair(inst, last) { addTag(last, "operand32") addTag(inst, "operand16") last = inst continue } } } last = inst } // Detect pseudo-ops, defined as opcode entries subsumed by more general ones. seen := map[string]*instruction{} for _, inst := range insts { if strings.HasPrefix(inst.opcode, "9B ") { // FWAIT prefix addTag(inst, "pseudo") continue } if inst.opcode == "F0" || inst.opcode == "F2" || inst.opcode == "F3" { addTag(inst, "pseudo") continue } if strings.HasPrefix(inst.syntax, "REP ") || strings.HasPrefix(inst.syntax, "REPE ") || strings.HasPrefix(inst.syntax, "REPNE ") { addTag(inst, "pseudo") continue } if strings.HasPrefix(inst.syntax, "SAL ") { // SHL is canonical addTag(inst, "pseudo") continue } if old := seen[inst.opcode]; old != nil { if condLess(old.syntax, inst.syntax) { addTag(inst, "pseudo") continue } if xchgLess(inst.syntax, old.syntax) { old.tags = append(old.tags, "pseudo") seen[inst.opcode] = inst continue } } seen[inst.opcode] = inst if last != nil && canGenerate(last.opcode, inst.opcode) { addTag(inst, "pseudo") continue } last = inst } for _, inst := range insts { if strings.Contains(inst.opcode, "REX ") { if old := seen[strings.Replace(inst.opcode, "REX ", "", 1)]; old != nil && old.syntax == inst.syntax { addTag(inst, "pseudo64") continue } else if old != nil && hasTag(old, "pseudo") { addTag(inst, "pseudo") continue } } if strings.Contains(inst.opcode, "REX.W ") { if old := seen[strings.Replace(inst.opcode, "REX.W ", "", -1)]; old != nil && old.syntax == inst.syntax { addTag(old, "ignoreREXW") addTag(inst, "pseudo") continue } else if old != nil && hasTag(old, "pseudo") { addTag(inst, "pseudo") continue } else if old != nil && !hasTag(old, "operand16") && !hasTag(old, "operand32") { // There is a 64-bit form of this instruction. // Mark this one as only valid in the non-64-bit operand modes. addTag(old, "operand16") addTag(old, "operand32") continue } } } // Undo XCHG hack above. for _, inst := range insts { if strings.HasPrefix(inst.opcode, "XX ") { inst.opcode = strings.TrimPrefix(inst.opcode, "XX ") addTag(inst, "pseudo") removeTag(inst, "pseudo64") } } // Last ditch effort. Manual fixes. // Some things are too hard to infer. for _, inst := range insts { for _, fix := range fixup[[2]string{inst.syntax, inst.opcode}] { fix(inst) } sort.Strings(inst.tags) } sort.Sort(bySeq(insts)) if onlySomePages { for _, inst := range extraInsts { op, _ := splitSyntax(inst.syntax) if haveOp[op] { insts = append(insts, inst) } } } else { insts = append(insts, extraInsts...) } return insts } func hasTag(inst *instruction, tag string) bool { for _, t := range inst.tags { if t == tag { return true } } return false } func removeTag(inst *instruction, tag string) { if !hasTag(inst, tag) { return } out := inst.tags[:0] for _, t := range inst.tags { if t != tag { out = append(out, t) } } inst.tags = out } func addTag(inst *instruction, tag string) { if !hasTag(inst, tag) { inst.tags = append(inst.tags, tag) } } type byOpcode []*instruction func (x byOpcode) Len() int { return len(x) } func (x byOpcode) Swap(i, j int) { x[i], x[j] = x[j], x[i] } func (x byOpcode) Less(i, j int) bool { if x[i].opcode != x[j].opcode { return opcodeLess(x[i].opcode, x[j].opcode) } if condLess(x[i].syntax, x[j].syntax) { return true } if condLess(x[j].syntax, x[i].syntax) { return false } if x[i].syntax != x[j].syntax { return x[i].syntax < x[j].syntax } return x[i].seq < x[j].seq } type bySeq []*instruction func (x bySeq) Len() int { return len(x) } func (x bySeq) Swap(i, j int) { x[i], x[j] = x[j], x[i] } func (x bySeq) Less(i, j int) bool { return x[i].seq < x[j].seq } type bySyntax []*instruction func (x bySyntax) Len() int { return len(x) } func (x bySyntax) Swap(i, j int) { x[i], x[j] = x[j], x[i] } func (x bySyntax) Less(i, j int) bool { if x[i].syntax != x[j].syntax { return x[i].syntax < x[j].syntax } return x[i].opcode < x[j].opcode } // condLess reports whether the conditional instruction syntax // x should be considered less than y. // We sort condition codes we prefer ahead of condition codes we don't, // so that the latter are recorded as the pseudo-operations. func condLess(x, y string) bool { x, _ = splitSyntax(x) y, _ = splitSyntax(y) for _, pref := range condPrefs { if strings.HasSuffix(x, pref[0]) && strings.HasSuffix(y, pref[1]) && strings.TrimSuffix(x, pref[0]) == strings.TrimSuffix(y, pref[1]) { return true } } return false } // xchgLess reports whether the xchg instruction x should be considered less than y. func xchgLess(x, y string) bool { return strings.HasPrefix(x, "XCHG ") && x > y } // opcodeLess reports whether opcode string x should be considered less than y. // We sort wildcard fields like "ib" before literal bytes like "0A". func opcodeLess(x, y string) bool { for i := 0; i < len(x) || i < len(y); i++ { if i >= len(x) { return true } if i >= len(y) { return false } if x[i] != y[i] { // sort word before doubleword if x[i] == 'w' && y[i] == 'd' { return true } if x[i] == 'd' && y[i] == 'w' { return false } // Sort lower-case before non-lower-case. // This sorts "ib" before literal bytes like "0A", for example. return x[i]-'a' < y[i]-'a' } } return false } // splitOpcode splits an opcode into its fixed and variable portions. // For example "05 iw" splits into "05" and "iw". func splitOpcode(x string) (fixed, variable string) { i := 0 for i < len(x) { c := x[i] if '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || c == ' ' || c == '.' || c == '+' { i++ continue } if i+2 <= len(x) && c == '/' { i += 2 continue } break } return strings.TrimSpace(x[:i]), x[i:] } // canGenerate reports whether opcode string x can generate opcode string y. // For example "D5 ib" can generate "D5 0A". // Any string x is not considered to generate itself. func canGenerate(x, y string) bool { i := 0 for i < len(x) && i < len(y) && x[i] == y[i] { i++ } if i == len(x) || i == len(y) { return false } switch x[i:] { case "ib": return len(y[i:]) == 2 && allHex(y[i:]) case "0+i": return len(y[i:]) == 1 && '0' <= y[i] && y[i] <= '7' case "8+i": return len(y[i:]) == 1 && (y[i] == '8' || y[i] == '9' || 'A' <= y[i] && y[i] <= 'F') } return false } // allHex reports whether s is entirely hex digits. func allHex(s string) bool { for _, c := range s { if '0' <= c && c <= '9' || 'A' <= c && c <= 'F' { continue } return false } return true } // is16vs32pair reports whether x and y are the 16- and 32-bit variants of the same instruction, // based on analysis of the mnemonic syntax. func is16vs32pair(x, y *instruction) bool { return conv16.Replace(x.syntax) == y.syntax || strings.Replace(x.syntax, "r16, r/", "r32, r32/", -1) == y.syntax || // LSL etc strings.Replace(x.syntax, "r16", "r32", 1) == y.syntax // MOVSXD, MOVSX, etc } func containsAll(x string, targ ...string) bool { for _, y := range targ { i := strings.Index(x, y) if i < 0 { return false } x = x[i+len(y):] } return true }