1
2
3
4
5 package main
6
7 import (
8 "fmt"
9 "os"
10 "sort"
11 "strings"
12 )
13
14
15
16
17
18
19
20
21
22 var encodeReplace = map[[2]string]string{
23 {"mm", "ModRM:reg"}: "mm1",
24 {"mm", "ModRM:r/m"}: "mm2",
25 {"mm1", "ModRM:r/m"}: "mm2",
26 {"mm2", "ModRM:reg"}: "mm1",
27 {"mm/m32", "ModRM:r/m"}: "mm2/m32",
28 {"mm/m64", "ModRM:r/m"}: "mm2/m64",
29 {"xmm", "ModRM:reg"}: "xmm1",
30 {"xmm", "ModRM:r/m"}: "xmm2",
31 {"xmm/m64", "ModRM:r/m"}: "xmm2/m64",
32 {"xmm0", "ModRM:reg"}: "xmm1",
33 {"xmm1", "ModRM:r/m"}: "xmm2",
34 {"xmm1/m16", "ModRM:r/m"}: "xmm2/m16",
35 {"xmm1/m32", "ModRM:r/m"}: "xmm2/m32",
36 {"xmm1/m64", "ModRM:r/m"}: "xmm2/m64",
37 {"xmm1/m128", "ModRM:r/m"}: "xmm2/m128",
38 {"xmm1/m256", "ModRM:r/m"}: "xmm2/m256",
39 {"xmm/m16", "ModRM:r/m"}: "xmm2/m16",
40 {"xmm/m32", "ModRM:r/m"}: "xmm2/m32",
41 {"xmm/m64", "ModRM:r/m"}: "xmm2/m64",
42 {"xmm/m128", "ModRM:r/m"}: "xmm2/m128",
43 {"xmm/m256", "ModRM:r/m"}: "xmm2/m256",
44 {"xmm3", "ModRM:reg"}: "xmm1",
45 {"xmm3", "ModRM:r/m"}: "xmm2",
46 {"xmm3/m16", "ModRM:r/m"}: "xmm2/m16",
47 {"xmm3/m32", "ModRM:r/m"}: "xmm2/m32",
48 {"xmm3/m64", "ModRM:r/m"}: "xmm2/m64",
49 {"xmm3/m128", "ModRM:r/m"}: "xmm2/m128",
50 {"xmm3/m256", "ModRM:r/m"}: "xmm2/m256",
51 {"xmm2", "ModRM:reg"}: "xmm1",
52 {"xmm2/m16", "ModRM:reg"}: "xmm1/m16",
53 {"xmm2/m32", "ModRM:reg"}: "xmm1/m32",
54 {"xmm2/m64", "ModRM:reg"}: "xmm1/m64",
55 {"xmm2/m128", "ModRM:reg"}: "xmm1/m128",
56 {"xmm2/m256", "ModRM:reg"}: "xmm1/m256",
57 {"ymm", "ModRM:reg"}: "ymm1",
58 {"ymm", "ModRM:r/m"}: "ymm2",
59 {"ymm0", "ModRM:reg"}: "ymm1",
60 {"ymm1", "ModRM:r/m"}: "ymm2",
61 {"ymm1/m16", "ModRM:r/m"}: "ymm2/m16",
62 {"ymm1/m32", "ModRM:r/m"}: "ymm2/m32",
63 {"ymm1/m64", "ModRM:r/m"}: "ymm2/m64",
64 {"ymm1/m128", "ModRM:r/m"}: "ymm2/m128",
65 {"ymm1/m256", "ModRM:r/m"}: "ymm2/m256",
66 {"ymm3", "ModRM:reg"}: "ymm1",
67 {"ymm3", "ModRM:r/m"}: "ymm2",
68 {"ymm3/m16", "ModRM:r/m"}: "ymm2/m16",
69 {"ymm3/m32", "ModRM:r/m"}: "ymm2/m32",
70 {"ymm3/m64", "ModRM:r/m"}: "ymm2/m64",
71 {"ymm3/m128", "ModRM:r/m"}: "ymm2/m128",
72 {"ymm3/m256", "ModRM:r/m"}: "ymm2/m256",
73 {"ymm2", "ModRM:reg"}: "ymm1",
74 {"ymm2/m16", "ModRM:reg"}: "ymm1/m16",
75 {"ymm2/m32", "ModRM:reg"}: "ymm1/m32",
76 {"ymm2/m64", "ModRM:reg"}: "ymm1/m64",
77 {"ymm2/m128", "ModRM:reg"}: "ymm1/m128",
78 {"ymm2/m256", "ModRM:reg"}: "ymm1/m256",
79 {"xmm1", "VEX.vvvv"}: "xmmV",
80 {"xmm2", "VEX.vvvv"}: "xmmV",
81 {"ymm1", "VEX.vvvv"}: "ymmV",
82 {"ymm2", "VEX.vvvv"}: "ymmV",
83 {"xmm4", "imm8[7:4]"}: "xmmIH",
84 {"ymm4", "imm8[7:4]"}: "ymmIH",
85 {"r8", "opcode + rd"}: "r8op",
86 {"r16", "opcode + rd"}: "r16op",
87 {"r32", "opcode + rd"}: "r32op",
88 {"r64", "opcode + rd"}: "r64op",
89 {"reg/m32", "ModRM:r/m"}: "r/m32",
90 {"reg/m16", "ModRM:r/m"}: "r32/m16",
91 {"bnd", "ModRM:reg"}: "bnd1",
92 {"bnd2", "ModRM:reg"}: "bnd1",
93 {"bnd1/m64", "ModRM:r/m"}: "bnd2/m64",
94 {"bnd1/m128", "ModRM:r/m"}: "bnd2/m128",
95 {"r32a", "ModRM:reg"}: "r32",
96 {"r64a", "ModRM:reg"}: "r64",
97 {"r32", "VEX.vvvv"}: "r32V",
98 {"r64", "VEX.vvvv"}: "r64V",
99 {"r32b", "VEX.vvvv"}: "r32V",
100 {"r64b", "VEX.vvvv"}: "r64V",
101 {"r64", "VEX.vvvv"}: "r64V",
102 {"ST", "ST(0)"}: "ST(0)",
103 }
104
105
106
107 var encodings = map[string][]string{
108 "FADD m32fp": {"ModRM:r/m (r)"},
109 "FADD m64fp": {"ModRM:r/m (r)"},
110 "FADD ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
111 "FADD ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
112 "FADDP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
113 "FIADD m32int": {"ModRM:r/m (r)"},
114 "FIADD m16int": {"ModRM:r/m (r)"},
115 "FBLD m80dec": {"ModRM:r/m (r)"},
116 "FBSTP m80bcd": {"ModRM:r/m (w)"},
117 "FCMOVB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
118 "FCMOVE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
119 "FCMOVBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
120 "FCMOVU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
121 "FCMOVNB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
122 "FCMOVNE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
123 "FCMOVNBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
124 "FCMOVNU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
125 "FCOM m32fp": {"ModRM:r/m (r)"},
126 "FCOM m64fp": {"ModRM:r/m (r)"},
127 "FCOM ST(i)": {"ST(i) (r)"},
128 "FCOMP m32fp": {"ModRM:r/m (r)"},
129 "FCOMP m64fp": {"ModRM:r/m (r)"},
130 "FCOMP ST(i)": {"ST(i) (r)"},
131 "FCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"},
132 "FCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"},
133 "FUCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"},
134 "FUCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"},
135 "FDIV m32fp": {"ModRM:r/m (r)"},
136 "FDIV m64fp": {"ModRM:r/m (r)"},
137 "FDIV ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
138 "FDIV ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
139 "FDIVP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
140 "FIDIV m16int": {"ModRM:r/m (r)"},
141 "FIDIV m32int": {"ModRM:r/m (r)"},
142 "FIDIV m64int": {"ModRM:r/m (r)"},
143 "FDIVR m32fp": {"ModRM:r/m (r)"},
144 "FDIVR m64fp": {"ModRM:r/m (r)"},
145 "FDIVR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
146 "FDIVR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
147 "FDIVRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
148 "FIDIVR m16int": {"ModRM:r/m (r)"},
149 "FIDIVR m32int": {"ModRM:r/m (r)"},
150 "FIDIVR m64int": {"ModRM:r/m (r)"},
151 "FFREE ST(i)": {"ST(i) (w)"},
152 "FICOM m16int": {"ModRM:r/m (r)"},
153 "FICOM m32int": {"ModRM:r/m (r)"},
154 "FICOMP m16int": {"ModRM:r/m (r)"},
155 "FICOMP m32int": {"ModRM:r/m (r)"},
156 "FILD m16int": {"ModRM:r/m (r)"},
157 "FILD m32int": {"ModRM:r/m (r)"},
158 "FILD m64int": {"ModRM:r/m (r)"},
159 "FIST m16int": {"ModRM:r/m (w)"},
160 "FIST m32int": {"ModRM:r/m (w)"},
161 "FISTP m16int": {"ModRM:r/m (w)"},
162 "FISTP m32int": {"ModRM:r/m (w)"},
163 "FISTP m64int": {"ModRM:r/m (w)"},
164 "FISTTP m16int": {"ModRM:r/m (w)"},
165 "FISTTP m32int": {"ModRM:r/m (w)"},
166 "FISTTP m64int": {"ModRM:r/m (w)"},
167 "FLD m32fp": {"ModRM:r/m (r)"},
168 "FLD m64fp": {"ModRM:r/m (r)"},
169 "FLD m80fp": {"ModRM:r/m (r)"},
170 "FLD ST(i)": {"ST(i) (r)"},
171 "FLDCW m2byte": {"ModRM:r/m (r)"},
172 "FLDENV m14/28byte": {"ModRM:r/m (r)"},
173 "FMUL m32fp": {"ModRM:r/m (r)"},
174 "FMUL m64fp": {"ModRM:r/m (r)"},
175 "FMUL ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
176 "FMUL ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
177 "FMULP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
178 "FIMUL m16int": {"ModRM:r/m (r)"},
179 "FIMUL m32int": {"ModRM:r/m (r)"},
180 "FRSTOR m94/108byte": {"ModRM:r/m (r)"},
181 "FSAVE m94/108byte": {"ModRM:r/m (w)"},
182 "FNSAVE m94/108byte": {"ModRM:r/m (w)"},
183 "FST m32fp": {"ModRM:r/m (w)"},
184 "FST m64fp": {"ModRM:r/m (w)"},
185 "FST m80fp": {"ModRM:r/m (w)"},
186 "FST ST(i)": {"ST(i) (w)"},
187 "FSTP m32fp": {"ModRM:r/m (w)"},
188 "FSTP m64fp": {"ModRM:r/m (w)"},
189 "FSTP m80fp": {"ModRM:r/m (w)"},
190 "FSTP ST(i)": {"ST(i) (w)"},
191 "FSTCW m2byte": {"ModRM:r/m (w)"},
192 "FNSTCW m2byte": {"ModRM:r/m (w)"},
193 "FSTENV m14/28byte": {"ModRM:r/m (w)"},
194 "FNSTENV m14/28byte": {"ModRM:r/m (w)"},
195 "FSTSW m2byte": {"ModRM:r/m (w)"},
196 "FSTSW AX": {"AX (w)"},
197 "FNSTSW m2byte": {"ModRM:r/m (w)"},
198 "FNSTSW AX": {"AX (w)"},
199 "FSUB m32fp": {"ModRM:r/m (r)"},
200 "FSUB m64fp": {"ModRM:r/m (r)"},
201 "FSUB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
202 "FSUB ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
203 "FSUBP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
204 "FISUB m16int": {"ModRM:r/m (r)"},
205 "FISUB m32int": {"ModRM:r/m (r)"},
206 "FSUBR m32fp": {"ModRM:r/m (r)"},
207 "FSUBR m64fp": {"ModRM:r/m (r)"},
208 "FSUBR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"},
209 "FSUBR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
210 "FSUBRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"},
211 "FISUBR m16int": {"ModRM:r/m (r)"},
212 "FISUBR m32int": {"ModRM:r/m (r)"},
213 "FISUBR m64int": {"ModRM:r/m (r)"},
214 "FUCOM ST(i)": {"ST(i) (r)"},
215 "FUCOMP ST(i)": {"ST(i) (r)"},
216 "FXCH ST(i)": {"ST(i) (r, w)"},
217 "POP DS": {"DS (w)"},
218 "POP ES": {"ES (w)"},
219 "POP FS": {"FS (w)"},
220 "POP GS": {"GS (w)"},
221 "POP SS": {"SS (w)"},
222 "POP CS": {"CS (w)"},
223 "PUSH CS": {"CS (r)"},
224 "PUSH DS": {"DS (r)"},
225 "PUSH ES": {"ES (r)"},
226 "PUSH FS": {"FS (r)"},
227 "PUSH GS": {"GS (r)"},
228 "PUSH SS": {"SS (r)"},
229 "INT 3": {"3 (r)"},
230
231
232 "BNDLDX bnd, mib": {"ModRM:reg (w)", "ModRM:r/m (r)"},
233 "BNDSTX mib, bnd": {"ModRM:r/m (r)", "ModRM:reg (r)"},
234
235
236 "CALL rel16": {"Offset"},
237 "CALL rel32": {"Offset"},
238 "IN AL, imm8": {"AL (w)", "imm8 (r)"},
239 "IN AX, imm8": {"AX (w)", "imm8 (r)"},
240 "IN EAX, imm8": {"EAX (w)", "imm8 (r)"},
241 "IN AL, DX": {"AL (w)", "DX (r)"},
242 "IN AX, DX": {"AX (w)", "DX (r)"},
243 "IN EAX, DX": {"EAX (w)", "DX (r)"},
244 "OUT DX, AL": {"DX (r)", "AL (r)"},
245 "OUT DX, AX": {"DX (r)", "AX (r)"},
246 "OUT DX, EAX": {"DX (r)", "EAX (r)"},
247 "OUT imm8, AL": {"imm8 (r)", "AL (r)"},
248 "OUT imm8, AX": {"imm8 (r)", "AX (r)"},
249 "OUT imm8, EAX": {"imm8 (r)", "EAX (r)"},
250 "XCHG AX, r16": {"AX (r, w)", "opcode + rd (r, w)"},
251 "XCHG EAX, r32": {"EAX (r, w)", "opcode + rd (r, w)"},
252 "XCHG RAX, r64": {"RAX (r, w)", "opcode + rd (r, w)"},
253
254
255 "INVEPT r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"},
256 "INVEPT r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"},
257 "INVVPID r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"},
258 "INVVPID r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"},
259 "VMREAD r/m32, r32": {"ModRM:r/m (w)", "ModRM:reg (r)"},
260 "VMREAD r/m64, r64": {"ModRM:r/m (w)", "ModRM:reg (r)"},
261 "VMWRITE r32, r/m32": {"ModRM:reg (r)", "ModRM:r/m (r)"},
262 "VMWRITE r64, r/m64": {"ModRM:reg (r)", "ModRM:r/m (r)"},
263 "VMCLEAR m64": {"ModRM:r/m (w)"},
264 "VMPTRLD m64": {"ModRM:r/m (r)"},
265 "VMPTRST m64": {"ModRM:r/m (w)"},
266 "VMXON m64": {"ModRM:r/m (r)"},
267 }
268
269
270
271 var opAction = map[string][]string{
272 "ADC": {"rw", "r"},
273 "ADD": {"rw", "r"},
274 "AND": {"rw", "r"},
275 "BLENDVPD": {"rw", "r", "r"},
276 "BLENDVPS": {"rw", "r", "r"},
277 "IN": {"w", "r"},
278 "MOV": {"w", "r"},
279 "OR": {"rw", "r"},
280 "OUT": {"r", "r"},
281 "PBLENDVB": {"rw", "r", "r"},
282 "RCL": {"rw", "r"},
283 "RCR": {"rw", "r"},
284 "ROL": {"rw", "r"},
285 "ROR": {"rw", "r"},
286 "SAL": {"rw", "r"},
287 "SAR": {"rw", "r"},
288 "SBB": {"rw", "r"},
289 "SHL": {"rw", "r"},
290 "SHLD": {"rw", "r", "r"},
291 "SHR": {"rw", "r"},
292 "SHRD": {"rw", "r", "r"},
293 "SUB": {"rw", "r", "r"},
294 "TEST": {"r", "r"},
295 "VBLENDVPD": {"rw", "r", "r"},
296 "VBLENDVPS": {"rw", "r", "r"},
297 "VPBLENDVB": {"rw", "r", "r"},
298 "VPMASKMOVD": {"w", "r", "r"},
299 "VPMASKMOVQ": {"w", "r", "r"},
300 "VPSLLVD": {"w", "r", "r"},
301 "VPSRAVD": {"w", "r", "r"},
302 "VPSRLVD": {"w", "r", "r"},
303 "VPSRLVQ": {"w", "r", "r"},
304 "VINSERTI128": {"w", "r", "r"},
305 "VPBLENDD": {"w", "r", "r"},
306 "VPERMD": {"w", "r", "r"},
307 "VPERMPS": {"w", "r", "r"},
308 "VPERM2I128": {"w", "r", "r"},
309 "VPSLLVQ": {"w", "r", "r"},
310 "XCHG": {"rw", "rw"},
311 "XOR": {"rw", "r"},
312 }
313
314
315
316 var encodeOK = map[[2]string]bool{
317 {"0", "imm8"}: true,
318 {"1", "1"}: true,
319 {"1", "imm8"}: true,
320 {"<XMM0>", "<XMM0>"}: true,
321 {"<XMM0>", "implicit XMM0"}: true,
322 {"AL", "AL"}: true,
323 {"AL", "AL/AX/EAX/RAX"}: true,
324 {"AX", "AL/AX/EAX/RAX"}: true,
325 {"AX", "AX"}: true,
326 {"AX", "AX/EAX/RAX"}: true,
327 {"CL", "CL"}: true,
328 {"CR0-CR7", "ModRM:reg"}: true,
329 {"CR8", ""}: true,
330 {"CS", "CS"}: true,
331 {"DR0-DR7", "ModRM:reg"}: true,
332 {"DS", "DS"}: true,
333 {"DX", "DX"}: true,
334 {"EAX", "AL/AX/EAX/RAX"}: true,
335 {"EAX", "AX/EAX/RAX"}: true,
336 {"EAX", "EAX"}: true,
337 {"ES", "ES"}: true,
338 {"FS", "FS"}: true,
339 {"GS", "GS"}: true,
340 {"RAX", "AL/AX/EAX/RAX"}: true,
341 {"RAX", "AX/EAX/RAX"}: true,
342 {"RAX", "RAX"}: true,
343 {"ST", "ST(0)"}: true,
344 {"ST(0)", "ST(0)"}: true,
345 {"ST(i)", "ST(i)"}: true,
346 {"Sreg", "ModRM:reg"}: true,
347 {"bnd1", "ModRM:reg"}: true,
348 {"bnd2/m128", "ModRM:r/m"}: true,
349 {"bnd2/m64", "ModRM:r/m"}: true,
350 {"imm16", "imm16"}: true,
351 {"imm16", "imm8"}: true,
352 {"imm16", "imm8/16/32"}: true,
353 {"imm16", "imm8/16/32"}: true,
354 {"imm16", "imm8/16/32/64"}: true,
355 {"imm16", "iw"}: true,
356 {"imm32", "imm8"}: true,
357 {"imm32", "imm8/16/32"}: true,
358 {"imm32", "imm8/16/32"}: true,
359 {"imm32", "imm8/16/32/64"}: true,
360 {"imm64", "imm8/16/32/64"}: true,
361 {"imm8", "imm8"}: true,
362 {"imm8", "imm8/16/32"}: true,
363 {"imm8", "imm8/16/32"}: true,
364 {"imm8", "imm8/16/32/64"}: true,
365 {"imm8", "imm8[3:0]"}: true,
366 {"m", "ModRM:r/m"}: true,
367 {"m128", "ModRM:r/m"}: true,
368 {"m14/28byte", "ModRM:r/m"}: true,
369 {"m16", "ModRM:r/m"}: true,
370 {"m16&16", "ModRM:r/m"}: true,
371 {"m16&32", "ModRM:r/m"}: true,
372 {"m16&64", "ModRM:r/m"}: true,
373 {"m16:16", "ModRM:r/m"}: true,
374 {"m16:16", "Offset"}: true,
375 {"m16:32", "ModRM:r/m"}: true,
376 {"m16:32", "Offset"}: true,
377 {"m16:64", "ModRM:r/m"}: true,
378 {"m16:64", "Offset"}: true,
379 {"m16int", "ModRM:r/m"}: true,
380 {"m256", "ModRM:r/m"}: true,
381 {"m2byte", "ModRM:r/m"}: true,
382 {"m32", "ModRM:r/m"}: true,
383 {"m32&32", "ModRM:r/m"}: true,
384 {"m32fp", "ModRM:r/m"}: true,
385 {"m32int", "ModRM:r/m"}: true,
386 {"m512byte", "ModRM:r/m"}: true,
387 {"m64", "ModRM:r/m"}: true,
388 {"m64fp", "ModRM:r/m"}: true,
389 {"m64int", "ModRM:r/m"}: true,
390 {"m8", "ModRM:r/m"}: true,
391 {"m80bcd", "ModRM:r/m"}: true,
392 {"m80dec", "ModRM:r/m"}: true,
393 {"m80fp", "ModRM:r/m"}: true,
394 {"m94/108byte", "ModRM:r/m"}: true,
395 {"mem", "ModRM:r/m"}: true,
396 {"mib", "ModRM:r/m"}: true,
397 {"mm/m32", "ModRM:r/m"}: true,
398 {"mm1", "ModRM:reg"}: true,
399 {"mm2", "ModRM:r/m"}: true,
400 {"mm2/m32", "ModRM:r/m"}: true,
401 {"mm2/m64", "ModRM:r/m"}: true,
402 {"moffs16", "Moffs"}: true,
403 {"moffs32", "Moffs"}: true,
404 {"moffs64", "Moffs"}: true,
405 {"moffs8", "Moffs"}: true,
406 {"ptr16:16", "Offset"}: true,
407 {"ptr16:32", "Offset"}: true,
408 {"r/m16", "ModRM:r/m"}: true,
409 {"r/m32", "ModRM:r/m"}: true,
410 {"r/m64", "ModRM:r/m"}: true,
411 {"r/m8", "ModRM:r/m"}: true,
412 {"r16", "ModRM:reg"}: true,
413 {"r16op", "opcode + rd"}: true,
414 {"r32", "ModRM:reg"}: true,
415 {"r32", "VEX.vvvv"}: true,
416 {"r32/m16", "ModRM:r/m"}: true,
417 {"r32/m8", "ModRM:r/m"}: true,
418 {"r32V", "VEX.vvvv"}: true,
419 {"r32op", "opcode + rd"}: true,
420 {"r64", "ModRM:reg"}: true,
421 {"r64/m16", "ModRM:r/m"}: true,
422 {"r64V", "VEX.vvvv"}: true,
423 {"r64op", "opcode + rd"}: true,
424 {"r8", "ModRM:reg"}: true,
425 {"r8op", "opcode + rd"}: true,
426 {"rel16", "Offset"}: true,
427 {"rel32", "Offset"}: true,
428 {"rel8", "Offset"}: true,
429 {"rmr16", "ModRM:r/m"}: true,
430 {"rmr32", "ModRM:r/m"}: true,
431 {"rmr64", "ModRM:r/m"}: true,
432 {"xmm/m128", "ModRM:r/m"}: true,
433 {"xmm/m32", "ModRM:r/m"}: true,
434 {"xmm1", "ModRM:reg"}: true,
435 {"xmm2", "ModRM:r/m"}: true,
436 {"xmm2/m128", "ModRM:r/m"}: true,
437 {"xmm2/m16", "ModRM:r/m"}: true,
438 {"xmm2/m32", "ModRM:r/m"}: true,
439 {"xmm2/m64", "ModRM:r/m"}: true,
440 {"xmm2/m8", "ModRM:r/m"}: true,
441 {"xmmIH", "imm8[7:4]"}: true,
442 {"xmmV", "VEX.vvvv"}: true,
443 {"ymm1", "ModRM:reg"}: true,
444 {"ymm2", "ModRM:r/m"}: true,
445 {"ymm2/m256", "ModRM:r/m"}: true,
446 {"ymmIH", "imm8[7:4]"}: true,
447 {"ymmV", "VEX.vvvv"}: true,
448 {"vm32x", "vsib"}: true,
449 {"vm64x", "vsib"}: true,
450 {"vm32y", "vsib"}: true,
451 {"vm64y", "vsib"}: true,
452 {"SS", "SS"}: true,
453 {"3", "3"}: true,
454 }
455
456
457
458
459
460 var instBlacklist = map[string]bool{
461 "CMPS m16, m16": true,
462 "CMPS m32, m32": true,
463 "CMPS m64, m64": true,
464 "CMPS m8, m8": true,
465 "INS m16, DX": true,
466 "INS m32, DX": true,
467 "INS m8, DX": true,
468 "LODS m16": true,
469 "LODS m32": true,
470 "LODS m64": true,
471 "LODS m8": true,
472 "MOVS m16, m16": true,
473 "MOVS m32, m32": true,
474 "MOVS m64, m64": true,
475 "MOVS m8, m8": true,
476 "OUTS DX, m16": true,
477 "OUTS DX, m32": true,
478 "OUTS DX, m8": true,
479 "REP INS m16, DX": true,
480 "REP INS m32, DX": true,
481 "REP INS m8, DX": true,
482 "REP INS r/m32, DX": true,
483 "REP LODS AL": true,
484 "REP LODS AX": true,
485 "REP LODS EAX": true,
486 "REP LODS RAX": true,
487 "REP MOVS m16, m16": true,
488 "REP MOVS m32, m32": true,
489 "REP MOVS m64, m64": true,
490 "REP MOVS m8, m8": true,
491 "REP OUTS DX, m16": true,
492 "REP OUTS DX, m32": true,
493 "REP OUTS DX, m8": true,
494 "REP OUTS DX, r/m16": true,
495 "REP OUTS DX, r/m32": true,
496 "REP OUTS DX, r/m8": true,
497 "REP STOS m16": true,
498 "REP STOS m32": true,
499 "REP STOS m64": true,
500 "REP STOS m8": true,
501 "REPE CMPS m16, m16": true,
502 "REPE CMPS m32, m32": true,
503 "REPE CMPS m64, m64": true,
504 "REPE CMPS m8, m8": true,
505 "REPE SCAS m16": true,
506 "REPE SCAS m32": true,
507 "REPE SCAS m64": true,
508 "REPE SCAS m8": true,
509 "REPNE CMPS m16, m16": true,
510 "REPNE CMPS m32, m32": true,
511 "REPNE CMPS m64, m64": true,
512 "REPNE CMPS m8, m8": true,
513 "REPNE SCAS m16": true,
514 "REPNE SCAS m32": true,
515 "REPNE SCAS m64": true,
516 "REPNE SCAS m8": true,
517 "SCAS m16": true,
518 "SCAS m32": true,
519 "SCAS m64": true,
520 "SCAS m8": true,
521 "STOS m16": true,
522 "STOS m32": true,
523 "STOS m64": true,
524 "STOS m8": true,
525 "XLAT m8": true,
526 }
527
528
529
530 var condPrefs = [][2]string{
531 {"B", "C"},
532 {"B", "NAE"},
533 {"AE", "NB"},
534 {"AE", "NC"},
535 {"E", "Z"},
536 {"NE", "NZ"},
537 {"BE", "NA"},
538 {"A", "NBE"},
539 {"P", "PE"},
540 {"NP", "PO"},
541 {"L", "NGE"},
542 {"GE", "NL"},
543 {"LE", "NG"},
544 {"G", "NLE"},
545 }
546
547
548
549
550
551 var conv16 = strings.NewReplacer(
552 "16:16", "16:32",
553 "16", "32",
554 "AX", "EAX",
555 "CBW", "CWDE",
556 "CMPSW", "CMPSD",
557 "CWD", "CDQ",
558 "INSW", "INSD",
559 "IRET", "IRETD",
560 "LODSW", "LODSD",
561 "MOVSW", "MOVSD",
562 "OUTSW", "OUTSD",
563 "POPA", "POPAD",
564 "POPF", "POPFD",
565 "PUSHA", "PUSHAD",
566 "PUSHF", "PUSHFD",
567 "SCASW", "SCASD",
568 "STOSW", "STOSD",
569 )
570
571
572
573 var fixup = map[[2]string][]fixer{
574
575
576 {"NOP", "90"}: {fixAddTag("pseudo")},
577
578
579 {"PAUSE", "F3 90"}: {fixAddTag("pseudo")},
580
581
582 {"CALL m16:16", "FF /3"}: {fixRename("CALL_FAR")},
583 {"CALL m16:32", "FF /3"}: {fixRename("CALL_FAR")},
584 {"CALL m16:64", "REX.W FF /3"}: {fixRename("CALL_FAR")},
585 {"CALL ptr16:16", "9A cd"}: {fixRename("CALL_FAR")},
586 {"CALL ptr16:32", "9A cp"}: {fixRename("CALL_FAR")},
587 {"JMP m16:16", "FF /5"}: {fixRename("JMP_FAR")},
588 {"JMP m16:32", "FF /5"}: {fixRename("JMP_FAR")},
589 {"JMP m16:64", "REX.W FF /5"}: {fixRename("JMP_FAR")},
590 {"JMP ptr16:16", "EA cd"}: {fixRename("JMP_FAR")},
591 {"JMP ptr16:32", "EA cp"}: {fixRename("JMP_FAR")},
592 {"RET imm16", "CA iw"}: {fixRename("RET_FAR"), fixArg(0, "imm16u")},
593 {"RET", "CB"}: {fixRename("RET_FAR")},
594
595
596
597 {"ENTER imm16, imm8", "C8 iw ib"}: {fixArg(1, "imm8b")},
598 {"RET imm16", "C2 iw"}: {fixArg(0, "imm16u")},
599 {"IN AL, imm8", "E4 ib"}: {fixArg(1, "imm8u")},
600 {"IN AX, imm8", "E5 ib"}: {fixArg(1, "imm8u")},
601 {"IN EAX, imm8", "E5 ib"}: {fixArg(1, "imm8u"), fixAddTag("operand64")},
602 {"OUT imm8, AL", "E6 ib"}: {fixArg(0, "imm8u")},
603 {"OUT imm8, AX", "E7 ib"}: {fixArg(0, "imm8u")},
604 {"OUT imm8, EAX", "E7 ib"}: {fixArg(0, "imm8u"), fixAddTag("operand64")},
605 {"MOV r8op, imm8", "B0+rb ib"}: {fixArg(1, "imm8u")},
606 {"MOV r8op, imm8", "REX B0+rb ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")},
607 {"MOV r/m8, imm8", "C6 /0 ib"}: {fixArg(1, "imm8u")},
608 {"MOV r/m8, imm8", "REX C6 /0 ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")},
609
610
611
612
613
614 {"MOVSX r32, r/m16", "0F BF /r"}: {fixRemoveTag("operand16"), fixAddTag("operand32")},
615 {"MOVZX r32, r/m16", "0F B7 /r"}: {fixRemoveTag("operand16")},
616
617
618 {"SLDT r/m16", "0F 00 /0"}: {fixRemoveTag("operand32")},
619 {"STR r/m16", "0F 00 /1"}: {fixAddTag("operand16")},
620 {"BSWAP r32op", "0F C8+rd"}: {fixRemoveTag("operand16")},
621 {"MOV Sreg, r/m16", "8E /r"}: {fixRemoveTag("operand32")},
622 {"MOV Sreg, r/m64", "REX.W 8E /r"}: {fixArg(1, "r/m16")},
623 {"MOV r/m64, Sreg", "REX.W 8C /r"}: {fixArg(0, "r/m16")},
624 {"MOV r/m16, Sreg", "8C /r"}: {fixRemoveTag("operand32")},
625 {"MOV r/m64, imm32", "REX.W C7 /0 io"}: {fixOpcode("REX.W C7 /0 id")},
626
627
628 {"POP FS", "0F A1"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))},
629 {"POP GS", "0F A9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))},
630 {"LEAVE", "C9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))},
631
632 {"IN EAX, DX", "ED"}: {fixAddTag("operand64")},
633 {"INSD", "6D"}: {fixAddTag("operand64")},
634 {"OUT DX, EAX", "EF"}: {fixAddTag("operand64")},
635 {"OUTSD", "6F"}: {fixAddTag("operand64")},
636 {"XBEGIN rel32", "C7 F8 cd"}: {fixAddTag("operand64")},
637
638
639 {"FWAIT", "9B"}: {fixRemoveTag("pseudo")},
640 {"WAIT", "9B"}: {fixAddTag("pseudo")},
641
642
643
644
645 {"LAHF", "9F"}: {fixValid("V", "V")},
646 {"SAHF", "9E"}: {fixValid("V", "V")},
647
648
649 {"JZ rel16", "0F 84 cw"}: {fixAddTag("operand16"), fixRemoveTag("operand32")},
650 {"JZ rel32", "0F 84 cd"}: {fixAddTag("operand32"), fixRemoveTag("operand16")},
651
652
653
654 {"XCHG r/m16, r16", "87 /r"}: {fixRemoveTag("pseudo")},
655
656
657 {"MOV rmr64, CR8", "REX.R + 0F 20 /0"}: {fixAddTag("pseudo")},
658 {"MOV CR8, rmr64", "REX.R + 0F 22 /0"}: {fixAddTag("pseudo")},
659
660
661 {"ADCX r32, r/m32", "66 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")},
662 {"ADOX r32, r/m32", "F3 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")},
663 {"POPFQ", "9D"}: {fixAddTag("operand32"), fixAddTag("operand64")},
664 {"PUSHFQ", "9C"}: {fixAddTag("operand32"), fixAddTag("operand64")},
665 {"JCXZ rel8", "E3 cb"}: {fixAddTag("address16")},
666 {"JECXZ rel8", "E3 cb"}: {fixAddTag("address32")},
667 {"JRCXZ rel8", "E3 cb"}: {fixAddTag("address64")},
668 {"PUSH r64op", "50+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")},
669 {"PUSH r/m64", "FF /6"}: {fixAddTag("operand32"), fixAddTag("operand64")},
670 {"POP r64op", "58+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")},
671 {"POP r/m64", "8F /0"}: {fixAddTag("operand32"), fixAddTag("operand64")},
672 {"SMSW r/m16", "0F 01 /4"}: {fixAddTag("operand16")},
673 {"SMSW r32/m16", "0F 01 /4"}: {fixRemoveTag("operand16"), fixAddTag("operand32")},
674
675
676 {"JA rel16", "0F 87 cw"}: {fixAddTag("operand16")},
677 {"JAE rel16", "0F 83 cw"}: {fixAddTag("operand16")},
678 {"JB rel16", "0F 82 cw"}: {fixAddTag("operand16")},
679 {"JBE rel16", "0F 86 cw"}: {fixAddTag("operand16")},
680 {"JE rel16", "0F 84 cw"}: {fixAddTag("operand16")},
681 {"JG rel16", "0F 8F cw"}: {fixAddTag("operand16")},
682 {"JGE rel16", "0F 8D cw"}: {fixAddTag("operand16")},
683 {"JL rel16", "0F 8C cw"}: {fixAddTag("operand16")},
684 {"JLE rel16", "0F 8E cw"}: {fixAddTag("operand16")},
685 {"JNE rel16", "0F 85 cw"}: {fixAddTag("operand16")},
686 {"JNO rel16", "0F 81 cw"}: {fixAddTag("operand16")},
687 {"JNP rel16", "0F 8B cw"}: {fixAddTag("operand16")},
688 {"JNS rel16", "0F 89 cw"}: {fixAddTag("operand16")},
689 {"JO rel16", "0F 80 cw"}: {fixAddTag("operand16")},
690 {"JP rel16", "0F 8A cw"}: {fixAddTag("operand16")},
691 {"JS rel16", "0F 88 cw"}: {fixAddTag("operand16")},
692
693 {"JA rel32", "0F 87 cd"}: {fixAddTag("operand32")},
694 {"JAE rel32", "0F 83 cd"}: {fixAddTag("operand32")},
695 {"JB rel32", "0F 82 cd"}: {fixAddTag("operand32")},
696 {"JBE rel32", "0F 86 cd"}: {fixAddTag("operand32")},
697 {"JE rel32", "0F 84 cd"}: {fixAddTag("operand32")},
698 {"JG rel32", "0F 8F cd"}: {fixAddTag("operand32")},
699 {"JGE rel32", "0F 8D cd"}: {fixAddTag("operand32")},
700 {"JL rel32", "0F 8C cd"}: {fixAddTag("operand32")},
701 {"JLE rel32", "0F 8E cd"}: {fixAddTag("operand32")},
702 {"JNE rel32", "0F 85 cd"}: {fixAddTag("operand32")},
703 {"JNO rel32", "0F 81 cd"}: {fixAddTag("operand32")},
704 {"JNP rel32", "0F 8B cd"}: {fixAddTag("operand32")},
705 {"JNS rel32", "0F 89 cd"}: {fixAddTag("operand32")},
706 {"JO rel32", "0F 80 cd"}: {fixAddTag("operand32")},
707 {"JP rel32", "0F 8A cd"}: {fixAddTag("operand32")},
708 {"JS rel32", "0F 88 cd"}: {fixAddTag("operand32")},
709
710 {"LSL r16, r/m16", "0F 03 /r"}: {fixAddTag("operand16")},
711 }
712
713 var extraInsts = []*instruction{
714
715 {syntax: "ICEBP", opcode: "F1", valid32: "V", valid64: "V"},
716 {syntax: "UD1", opcode: "0F B9", valid32: "V", valid64: "V"},
717 {syntax: "FFREEP ST(i)", opcode: "DF C0+i", valid32: "V", valid64: "V", action: "w"},
718
719
720 {syntax: "MOVNTSD m64, xmm1", opcode: "F2 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"},
721 {syntax: "MOVNTSS m32, xmm1", opcode: "F3 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"},
722
723
724
725 {syntax: "CALL rel32", opcode: "E8 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
726 {syntax: "JMP rel32", opcode: "E9 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
727 {syntax: "JA rel32", opcode: "0F 87 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
728 {syntax: "JAE rel32", opcode: "0F 83 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
729 {syntax: "JB rel32", opcode: "0F 82 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
730 {syntax: "JBE rel32", opcode: "0F 86 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
731 {syntax: "JE rel32", opcode: "0F 84 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
732 {syntax: "JG rel32", opcode: "0F 8F cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
733 {syntax: "JGE rel32", opcode: "0F 8D cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
734 {syntax: "JL rel32", opcode: "0F 8C cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
735 {syntax: "JLE rel32", opcode: "0F 8E cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
736 {syntax: "JNE rel32", opcode: "0F 85 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
737 {syntax: "JNO rel32", opcode: "0F 81 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
738 {syntax: "JNP rel32", opcode: "0F 8B cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
739 {syntax: "JNS rel32", opcode: "0F 89 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
740 {syntax: "JO rel32", opcode: "0F 80 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
741 {syntax: "JP rel32", opcode: "0F 8A cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
742 {syntax: "JS rel32", opcode: "0F 88 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"},
743
744
745
746
747
748 {syntax: "MOVSX r16, r/m16", opcode: "0F BF /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"},
749 {syntax: "MOVSXD r16, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand16"}, action: "w,r"},
750 {syntax: "MOVSXD r32, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand32"}, action: "w,r"},
751 {syntax: "MOVZX r16, r/m16", opcode: "0F B7 /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"},
752
753 {syntax: "LAR r64, r/m16", opcode: "REX.W 0F 02 /r", valid32: "N.E.", valid64: "V", action: "w,r"},
754 {syntax: "SLDT r32/m16", opcode: "0F 00 /0", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"},
755 {syntax: "STR r32/m16", opcode: "0F 00 /1", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"},
756 {syntax: "STR r64/m16", opcode: "REX.W 0F 00 /1", valid32: "N.E.", valid64: "V", action: "w"},
757
758 {syntax: "BSWAP r16op", opcode: "0F C8+rd", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "rw"},
759
760
761
762
763
764
765
766 {syntax: "MOV Sreg, r32/m16", opcode: "8E /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"},
767 {syntax: "MOV r/m32, Sreg", opcode: "8C /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"},
768 }
769
770 type fixer func(*instruction)
771
772 func fixAddTag(tag string) fixer {
773 return func(inst *instruction) {
774 addTag(inst, tag)
775 }
776 }
777
778 func fixRemoveTag(tag string) fixer {
779 return func(inst *instruction) {
780 removeTag(inst, tag)
781 }
782 }
783
784 func fixRename(op string) fixer {
785 return func(inst *instruction) {
786 _, args := splitSyntax(inst.syntax)
787 inst.syntax = joinSyntax(op, args)
788 }
789 }
790
791 func fixArg(i int, arg string) fixer {
792 return func(inst *instruction) {
793 op, args := splitSyntax(inst.syntax)
794 args[i] = arg
795 inst.syntax = joinSyntax(op, args)
796 }
797 }
798
799 func fixIfValid(valid32, valid64 string, fix fixer) fixer {
800 return func(inst *instruction) {
801 if inst.valid32 == valid32 && inst.valid64 == valid64 {
802 fix(inst)
803 }
804 }
805 }
806
807 func fixValid(valid32, valid64 string) fixer {
808 return func(inst *instruction) {
809 inst.valid32 = valid32
810 inst.valid64 = valid64
811 }
812 }
813
814 func fixOpcode(opcode string) fixer {
815 return func(inst *instruction) {
816 inst.opcode = opcode
817 }
818 }
819
820 func cleanup(insts []*instruction) []*instruction {
821 var haveOp map[string]bool
822 if onlySomePages {
823 haveOp = map[string]bool{}
824 }
825
826
827 sawJZ := map[string]bool{}
828 out := insts[:0]
829 for seq, inst := range insts {
830 inst.seq = seq
831
832
833 if strings.HasPrefix(inst.syntax, "JZ rel") {
834 if sawJZ[inst.syntax] {
835 continue
836 }
837 sawJZ[inst.syntax] = true
838 }
839 out = append(out, inst)
840
841
842 surprises := []string{
843 " m64",
844 " m128",
845 }
846 for _, s := range surprises {
847 if strings.HasSuffix(inst.syntax, s) && strings.HasSuffix(inst.opcode, s) {
848 inst.opcode = strings.TrimSuffix(inst.opcode, s)
849 }
850 }
851
852 op, args := splitSyntax(inst.syntax)
853 op = strings.TrimRight(op, "*")
854 inst.syntax = joinSyntax(op, args)
855
856
857 if enc, ok := encodings[inst.syntax]; ok {
858 inst.args = enc
859 }
860 if len(args) == len(inst.args)+1 && args[len(args)-1] == "imm8" {
861 fixed := make([]string, len(args))
862 copy(fixed, inst.args)
863 fixed[len(args)-1] = "imm8"
864 inst.args = fixed
865 } else if len(args) == 0 && len(inst.args) == 1 && inst.args[0] == "NA" {
866 inst.args = []string{}
867 } else if len(args) != len(inst.args) {
868 fmt.Fprintf(os.Stderr, "p.%d: %s has %d args but %d encoding details:\n\t%s\n", inst.page, inst.syntax, len(args), len(inst.args), strings.Join(inst.args, "; "))
869 inst.syntax = joinSyntax(op, args)
870 continue
871 }
872
873 var action []string
874 for i, arg := range args {
875 arg = strings.TrimSpace(arg)
876 arg = strings.TrimRight(arg, "*")
877 if (arg == "reg" || strings.HasPrefix(arg, "reg/")) && containsAll(inst.desc, "upper bits", "r64", "zero") {
878 arg = "r32" + strings.TrimPrefix(arg, "reg")
879 }
880
881 enc := inst.args[i]
882 enc = strings.TrimSpace(enc)
883 switch {
884 case strings.HasSuffix(enc, " (r))"):
885 enc = strings.TrimSuffix(enc, ")")
886 case strings.HasSuffix(enc, " (R)"):
887 enc = strings.TrimSuffix(enc, " (R)") + " (r)"
888 case strings.HasSuffix(enc, " (W)"):
889 enc = strings.TrimSuffix(enc, " (W)") + " (w)"
890 case strings.HasSuffix(enc, " (r,w)"):
891 enc = strings.TrimSuffix(enc, " (r,w)") + " (r, w)"
892 case enc == "Imm8":
893 enc = "imm8"
894 case enc == "imm8/26/32":
895 enc = "imm8/16/32"
896 case enc == "BaseReg (R): VSIB:base, VectorReg(R): VSIB:index":
897 enc = "vsib (r)"
898 }
899 inst.args[i] = enc
900
901 switch {
902 case strings.HasSuffix(enc, " (r)"):
903 action = append(action, "r")
904 enc = strings.TrimSuffix(enc, " (r)")
905 case strings.HasSuffix(enc, " (w)"):
906 action = append(action, "w")
907 enc = strings.TrimSuffix(enc, " (w)")
908 case strings.HasSuffix(enc, " (r, w)"):
909 action = append(action, "rw")
910 enc = strings.TrimSuffix(enc, " (r, w)")
911 case strings.HasPrefix(enc, "imm"), enc == "Offset", enc == "iw", arg == "1", arg == "0", arg == "3":
912 action = append(action, "r")
913 case i < len(opAction[op]):
914 action = append(action, opAction[op][i])
915 default:
916 fmt.Fprintf(os.Stderr, "p.%d: %s has encoding %s for %s but no r/w annotations\n", inst.page, inst.syntax, enc, arg)
917 action = append(action, "?")
918 }
919
920 if arg == "mem" && op == "LDDQU" {
921 arg = "m128"
922 }
923 if arg == "reg" && op == "LAR" {
924 arg = "r32"
925 }
926 if actual := encodeReplace[[2]string{arg, enc}]; actual != "" {
927 arg = actual
928 }
929
930 if (arg == "r8" || arg == "r16" || arg == "r32" || arg == "r64") && enc == "ModRM:r/m" {
931 addTag(inst, "modrm_regonly")
932 arg = "rmr" + arg[1:]
933 }
934 if (arg == "xmm2" || arg == "ymm2") && enc == "ModRM:r/m" {
935 addTag(inst, "modrm_regonly")
936 }
937
938 if (arg == "m8" || arg == "m16" || arg == "m32" || arg == "m64" || arg == "m128" || arg == "m256") && enc == "ModRM:r/m" {
939 addTag(inst, "modrm_memonly")
940 }
941
942 if arg == "r64" && (inst.syntax == "MOV r64, CR8" || inst.syntax == "MOV CR8, r64") {
943 arg = "rmr64"
944 addTag(inst, "modrm_regonly")
945 }
946 if arg == "CR8" {
947 enc = ""
948 }
949
950 if !encodeOK[[2]string{arg, enc}] {
951 fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s for %s\n\t{%q, %q}: true,\n", inst.page, inst.syntax, enc, arg, arg, enc)
952 }
953
954 args[i] = arg
955
956
957
958 if strings.HasPrefix(enc, "ModRM") && !strings.Contains(inst.opcode, " /") && op != "CALL" {
959 inst.opcode += " /r"
960 }
961 if strings.HasPrefix(enc, "ModRM:reg") && !strings.Contains(inst.opcode, "/r") {
962
963 fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s: no reg field in %s\n", inst.page, inst.syntax, arg, inst.opcode)
964 }
965
966 if enc == "Offset" && arg == "rel16" && !strings.Contains(inst.opcode, " cw") {
967 inst.opcode += " cw"
968 }
969 if enc == "Offset" && arg == "rel32" && !strings.Contains(inst.opcode, " cd") {
970 inst.opcode += " cd"
971 }
972 if enc == "Moffs" && !strings.Contains(inst.opcode, "cm") {
973 inst.opcode += " cm"
974 }
975
976 inst.action = strings.Join(action, ",")
977 }
978
979 inst.syntax = joinSyntax(op, args)
980
981
982
983
984 if op == "XCHG" && !strings.HasPrefix(args[0], "r/") && !strings.HasSuffix(args[0], "op") {
985 inst.opcode = "XX " + inst.opcode
986 }
987
988
989 if strings.Contains(inst.opcode, "REX") && inst.valid32 == "V" {
990 inst.valid32 = "N.E."
991 }
992
993 if inst.valid32 == "V" {
994 switch {
995 case containsAll(inst.compat, "not supported", "earlier than the Intel486"):
996 inst.cpuid = "486"
997 case containsAll(inst.compat, "not supported", "earlier than the Pentium"),
998 containsAll(inst.compat, "were introduced", "with the Pentium"):
999 inst.cpuid = "Pentium"
1000 case containsAll(inst.compat, "were introduced", "in the Pentium II"):
1001 inst.cpuid = "PentiumII"
1002 case containsAll(inst.compat, "were introduced", "in the P6 family"),
1003 containsAll(inst.compat, "were introduced in P6 family"):
1004 addTag(inst, "P6")
1005 }
1006 }
1007
1008 if onlySomePages {
1009 op, _ := splitSyntax(inst.syntax)
1010 haveOp[op] = true
1011 }
1012 }
1013
1014 insts = out
1015 sort.Sort(byOpcode(insts))
1016
1017
1018 var last *instruction
1019 for _, inst := range insts {
1020 if last != nil {
1021 f1, _ := splitOpcode(last.opcode)
1022 f2, _ := splitOpcode(inst.opcode)
1023 if f1 == f2 {
1024
1025 if is16vs32pair(last, inst) {
1026 addTag(last, "operand16")
1027 addTag(inst, "operand32")
1028 continue
1029 }
1030 if is16vs32pair(inst, last) {
1031 addTag(last, "operand32")
1032 addTag(inst, "operand16")
1033 last = inst
1034 continue
1035 }
1036 }
1037 }
1038 last = inst
1039 }
1040
1041
1042 seen := map[string]*instruction{}
1043 for _, inst := range insts {
1044 if strings.HasPrefix(inst.opcode, "9B ") {
1045 addTag(inst, "pseudo")
1046 continue
1047 }
1048 if inst.opcode == "F0" || inst.opcode == "F2" || inst.opcode == "F3" {
1049 addTag(inst, "pseudo")
1050 continue
1051 }
1052 if strings.HasPrefix(inst.syntax, "REP ") || strings.HasPrefix(inst.syntax, "REPE ") || strings.HasPrefix(inst.syntax, "REPNE ") {
1053 addTag(inst, "pseudo")
1054 continue
1055 }
1056 if strings.HasPrefix(inst.syntax, "SAL ") {
1057 addTag(inst, "pseudo")
1058 continue
1059 }
1060 if old := seen[inst.opcode]; old != nil {
1061 if condLess(old.syntax, inst.syntax) {
1062 addTag(inst, "pseudo")
1063 continue
1064 }
1065 if xchgLess(inst.syntax, old.syntax) {
1066 old.tags = append(old.tags, "pseudo")
1067 seen[inst.opcode] = inst
1068 continue
1069 }
1070 }
1071
1072 seen[inst.opcode] = inst
1073
1074 if last != nil && canGenerate(last.opcode, inst.opcode) {
1075 addTag(inst, "pseudo")
1076 continue
1077 }
1078 last = inst
1079 }
1080 for _, inst := range insts {
1081 if strings.Contains(inst.opcode, "REX ") {
1082 if old := seen[strings.Replace(inst.opcode, "REX ", "", 1)]; old != nil && old.syntax == inst.syntax {
1083 addTag(inst, "pseudo64")
1084 continue
1085 } else if old != nil && hasTag(old, "pseudo") {
1086 addTag(inst, "pseudo")
1087 continue
1088 }
1089 }
1090 if strings.Contains(inst.opcode, "REX.W ") {
1091 if old := seen[strings.Replace(inst.opcode, "REX.W ", "", -1)]; old != nil && old.syntax == inst.syntax {
1092 addTag(old, "ignoreREXW")
1093 addTag(inst, "pseudo")
1094 continue
1095 } else if old != nil && hasTag(old, "pseudo") {
1096 addTag(inst, "pseudo")
1097 continue
1098 } else if old != nil && !hasTag(old, "operand16") && !hasTag(old, "operand32") {
1099
1100
1101 addTag(old, "operand16")
1102 addTag(old, "operand32")
1103 continue
1104 }
1105 }
1106 }
1107
1108
1109 for _, inst := range insts {
1110 if strings.HasPrefix(inst.opcode, "XX ") {
1111 inst.opcode = strings.TrimPrefix(inst.opcode, "XX ")
1112 addTag(inst, "pseudo")
1113 removeTag(inst, "pseudo64")
1114 }
1115 }
1116
1117
1118
1119 for _, inst := range insts {
1120 for _, fix := range fixup[[2]string{inst.syntax, inst.opcode}] {
1121 fix(inst)
1122 }
1123 sort.Strings(inst.tags)
1124 }
1125
1126 sort.Sort(bySeq(insts))
1127
1128 if onlySomePages {
1129 for _, inst := range extraInsts {
1130 op, _ := splitSyntax(inst.syntax)
1131 if haveOp[op] {
1132 insts = append(insts, inst)
1133 }
1134 }
1135 } else {
1136 insts = append(insts, extraInsts...)
1137 }
1138 return insts
1139 }
1140
1141 func hasTag(inst *instruction, tag string) bool {
1142 for _, t := range inst.tags {
1143 if t == tag {
1144 return true
1145 }
1146 }
1147 return false
1148 }
1149
1150 func removeTag(inst *instruction, tag string) {
1151 if !hasTag(inst, tag) {
1152 return
1153 }
1154 out := inst.tags[:0]
1155 for _, t := range inst.tags {
1156 if t != tag {
1157 out = append(out, t)
1158 }
1159 }
1160 inst.tags = out
1161 }
1162
1163 func addTag(inst *instruction, tag string) {
1164 if !hasTag(inst, tag) {
1165 inst.tags = append(inst.tags, tag)
1166 }
1167 }
1168
1169 type byOpcode []*instruction
1170
1171 func (x byOpcode) Len() int { return len(x) }
1172 func (x byOpcode) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
1173 func (x byOpcode) Less(i, j int) bool {
1174 if x[i].opcode != x[j].opcode {
1175 return opcodeLess(x[i].opcode, x[j].opcode)
1176 }
1177 if condLess(x[i].syntax, x[j].syntax) {
1178 return true
1179 }
1180 if condLess(x[j].syntax, x[i].syntax) {
1181 return false
1182 }
1183 if x[i].syntax != x[j].syntax {
1184 return x[i].syntax < x[j].syntax
1185 }
1186 return x[i].seq < x[j].seq
1187 }
1188
1189 type bySeq []*instruction
1190
1191 func (x bySeq) Len() int { return len(x) }
1192 func (x bySeq) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
1193 func (x bySeq) Less(i, j int) bool {
1194 return x[i].seq < x[j].seq
1195 }
1196
1197 type bySyntax []*instruction
1198
1199 func (x bySyntax) Len() int { return len(x) }
1200 func (x bySyntax) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
1201 func (x bySyntax) Less(i, j int) bool {
1202 if x[i].syntax != x[j].syntax {
1203 return x[i].syntax < x[j].syntax
1204 }
1205 return x[i].opcode < x[j].opcode
1206 }
1207
1208
1209
1210
1211
1212 func condLess(x, y string) bool {
1213 x, _ = splitSyntax(x)
1214 y, _ = splitSyntax(y)
1215 for _, pref := range condPrefs {
1216 if strings.HasSuffix(x, pref[0]) && strings.HasSuffix(y, pref[1]) && strings.TrimSuffix(x, pref[0]) == strings.TrimSuffix(y, pref[1]) {
1217 return true
1218 }
1219 }
1220 return false
1221 }
1222
1223
1224 func xchgLess(x, y string) bool {
1225 return strings.HasPrefix(x, "XCHG ") && x > y
1226 }
1227
1228
1229
1230 func opcodeLess(x, y string) bool {
1231 for i := 0; i < len(x) || i < len(y); i++ {
1232 if i >= len(x) {
1233 return true
1234 }
1235 if i >= len(y) {
1236 return false
1237 }
1238 if x[i] != y[i] {
1239
1240 if x[i] == 'w' && y[i] == 'd' {
1241 return true
1242 }
1243 if x[i] == 'd' && y[i] == 'w' {
1244 return false
1245 }
1246
1247
1248 return x[i]-'a' < y[i]-'a'
1249 }
1250 }
1251 return false
1252 }
1253
1254
1255
1256 func splitOpcode(x string) (fixed, variable string) {
1257 i := 0
1258 for i < len(x) {
1259 c := x[i]
1260 if '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || c == ' ' || c == '.' || c == '+' {
1261 i++
1262 continue
1263 }
1264 if i+2 <= len(x) && c == '/' {
1265 i += 2
1266 continue
1267 }
1268 break
1269 }
1270 return strings.TrimSpace(x[:i]), x[i:]
1271 }
1272
1273
1274
1275
1276 func canGenerate(x, y string) bool {
1277 i := 0
1278 for i < len(x) && i < len(y) && x[i] == y[i] {
1279 i++
1280 }
1281 if i == len(x) || i == len(y) {
1282 return false
1283 }
1284 switch x[i:] {
1285 case "ib":
1286 return len(y[i:]) == 2 && allHex(y[i:])
1287 case "0+i":
1288 return len(y[i:]) == 1 && '0' <= y[i] && y[i] <= '7'
1289 case "8+i":
1290 return len(y[i:]) == 1 && (y[i] == '8' || y[i] == '9' || 'A' <= y[i] && y[i] <= 'F')
1291 }
1292 return false
1293 }
1294
1295
1296 func allHex(s string) bool {
1297 for _, c := range s {
1298 if '0' <= c && c <= '9' || 'A' <= c && c <= 'F' {
1299 continue
1300 }
1301 return false
1302 }
1303 return true
1304 }
1305
1306
1307
1308 func is16vs32pair(x, y *instruction) bool {
1309 return conv16.Replace(x.syntax) == y.syntax ||
1310 strings.Replace(x.syntax, "r16, r/", "r32, r32/", -1) == y.syntax ||
1311 strings.Replace(x.syntax, "r16", "r32", 1) == y.syntax
1312 }
1313
1314 func containsAll(x string, targ ...string) bool {
1315 for _, y := range targ {
1316 i := strings.Index(x, y)
1317 if i < 0 {
1318 return false
1319 }
1320 x = x[i+len(y):]
1321 }
1322 return true
1323 }
1324
View as plain text