1
2
3
4
5 package main
6
7 import (
8 "bytes"
9 "errors"
10 "fmt"
11 "io"
12 "log"
13 "math"
14 "os"
15 "reflect"
16 "regexp"
17 "sort"
18 "strconv"
19 "strings"
20 "time"
21
22 "rsc.io/pdf"
23 )
24
25
26
27 type listing struct {
28 pageNum int
29 name string
30 mtables [][][]string
31 enctables [][][]string
32 compat string
33 }
34
35 type logReaderAt struct {
36 f io.ReaderAt
37 }
38
39 func (l *logReaderAt) ReadAt(x []byte, off int64) (int, error) {
40 log.Printf("read %d @ %d", len(x), off)
41 return l.f.ReadAt(x, off)
42 }
43
44 const (
45 cacheBlockSize = 64 * 1024
46 numCacheBlock = 16
47 )
48
49 type cachedReaderAt struct {
50 r io.ReaderAt
51 cache *cacheBlock
52 }
53
54 type cacheBlock struct {
55 next *cacheBlock
56 buf []byte
57 offset int64
58 err error
59 }
60
61 func newCachedReaderAt(r io.ReaderAt) *cachedReaderAt {
62 c := &cachedReaderAt{
63 r: r,
64 }
65 for i := 0; i < numCacheBlock; i++ {
66 c.cache = &cacheBlock{next: c.cache}
67 }
68 return c
69 }
70
71 func (c *cachedReaderAt) ReadAt(p []byte, offset int64) (n int, err error) {
72
73 if len(p) >= cacheBlockSize {
74 return c.r.ReadAt(p, offset)
75 }
76
77 for n < len(p) {
78 o := offset + int64(n)
79 f := o & (cacheBlockSize - 1)
80 b := c.readBlock(o - f)
81 n += copy(p[n:], b.buf[f:])
82 if n < len(p) && b.err != nil {
83 return n, b.err
84 }
85 }
86 return n, nil
87 }
88
89 var errShortRead = errors.New("short read")
90
91 func (c *cachedReaderAt) readBlock(offset int64) *cacheBlock {
92 if offset&(cacheBlockSize-1) != 0 {
93 panic("misuse of cachedReaderAt.readBlock")
94 }
95
96
97 var b, prev *cacheBlock
98 for b = c.cache; ; prev, b = b, b.next {
99 if b.buf != nil && b.offset == offset {
100
101 if prev != nil {
102 prev.next = b.next
103 b.next = c.cache
104 c.cache = b
105 }
106 return b
107 }
108 if b.next == nil {
109 break
110 }
111 }
112
113
114 if b.buf == nil {
115 b.buf = make([]byte, cacheBlockSize)
116 }
117 b.offset = offset
118 n, err := c.r.ReadAt(b.buf[:cacheBlockSize], offset)
119 b.buf = b.buf[:n]
120 b.err = err
121 if n > 0 {
122
123 prev.next = nil
124 b.next = c.cache
125 c.cache = b
126 }
127 return b
128 }
129
130 func pdfOpen(name string) (*pdf.Reader, error) {
131 f, err := os.Open(name)
132 if err != nil {
133 return nil, err
134 }
135 fi, err := f.Stat()
136 if err != nil {
137 f.Close()
138 return nil, err
139 }
140 return pdf.NewReader(newCachedReaderAt(f), fi.Size())
141 }
142
143 func parse() []*instruction {
144 var insts []*instruction
145
146 f, err := pdfOpen(*flagFile)
147 if err != nil {
148 log.Fatal(err)
149 }
150
151
152 instList := instHeadings(f.Outline())
153 if len(instList) < 200 {
154 log.Fatalf("only found %d instructions in table of contents", len(instList))
155 }
156
157
158
159 n := f.NumPage()
160 var current *listing
161 finishInstruction := func() {
162 if current == nil {
163 return
164 }
165 if len(current.mtables) == 0 || len(current.mtables[0]) <= 1 {
166 fmt.Fprintf(os.Stderr, "p.%d: no mnemonics for instruction %q\n", current.pageNum, current.name)
167 }
168 processListing(current, &insts)
169 current = nil
170 }
171
172 for pageNum := 1; pageNum <= n; pageNum++ {
173 if onlySomePages && !isDebugPage(pageNum) {
174 continue
175 }
176 p := f.Page(pageNum)
177 parsed := parsePage(p, pageNum)
178 if parsed.name != "" {
179 finishInstruction()
180 for j, headline := range instList {
181 if parsed.name == headline {
182 instList[j] = ""
183 current = parsed
184 break
185 }
186 }
187 if current == nil {
188 fmt.Fprintf(os.Stderr, "p.%d: unexpected instruction %q\n", pageNum, parsed.name)
189 }
190 continue
191 }
192 if current != nil {
193 merge(current, parsed)
194 continue
195 }
196 if parsed.mtables != nil {
197 fmt.Fprintf(os.Stderr, "p.%d: unexpected mnemonic table\n", pageNum)
198 }
199 if parsed.enctables != nil {
200 fmt.Fprintf(os.Stderr, "p.%d: unexpected encoding table\n", pageNum)
201 }
202 if parsed.compat != "" {
203 fmt.Fprintf(os.Stderr, "p.%d: unexpected compatibility statement\n", pageNum)
204 }
205 }
206 finishInstruction()
207
208 if !onlySomePages {
209 for _, headline := range instList {
210 if headline != "" {
211 fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline)
212 }
213 }
214 }
215
216 return insts
217 }
218
219
220
221
222 func isDebugPage(n int) bool {
223 s := *flagDebugPage
224 var k int
225 for i := 0; ; i++ {
226 if i == len(s) || s[i] == ',' {
227 if n == k {
228 return true
229 }
230 k = 0
231 }
232 if i == len(s) {
233 break
234 }
235 if '0' <= s[i] && s[i] <= '9' {
236 k = k*10 + int(s[i]) - '0'
237 }
238 }
239 return false
240 }
241
242
243 func merge(x, y *listing) {
244 if y.name != "" {
245 fmt.Fprintf(os.Stderr, "p.%d: merging page incorrectly\n", y.pageNum)
246 return
247 }
248
249 x.mtables = append(x.mtables, y.mtables...)
250 x.enctables = append(x.enctables, y.enctables...)
251 x.compat += y.compat
252 }
253
254
255
256 func instHeadings(outline pdf.Outline) []string {
257 return appendInstHeadings(outline, nil)
258 }
259
260 var instRE = regexp.MustCompile(`\d Instructions \([A-Z]-[A-Z]\)|VMX Instructions|Instruction SET Reference|SHA Extensions Reference`)
261
262
263 var fixDash = strings.NewReplacer(
264 "Compute 2 –1", "Compute 2^x-1",
265 "Compute 2x-1", "Compute 2^x-1",
266 "Compute 2x–1", "Compute 2^x-1",
267 "/ FUCOMI", "/FUCOMI",
268 "Compute y ∗ log x", "Compute y * log₂x",
269 "Compute y * log2x", "Compute y * log₂x",
270 "Compute y * log2(x +1)", "Compute y * log₂(x+1)",
271 "Compute y ∗ log (x +1)", "Compute y * log₂(x+1)",
272 " — ", "-",
273 "— ", "-",
274 " —", "-",
275 "—", "-",
276 " – ", "-",
277 " –", "-",
278 "– ", "-",
279 "–", "-",
280 " - ", "-",
281 "- ", "-",
282 " -", "-",
283 )
284
285 func appendInstHeadings(outline pdf.Outline, list []string) []string {
286 if instRE.MatchString(outline.Title) {
287 for _, child := range outline.Child {
288 list = append(list, fixDash.Replace(child.Title))
289 }
290 }
291 for _, child := range outline.Child {
292 list = appendInstHeadings(child, list)
293 }
294 return list
295 }
296
297 var dateRE = regexp.MustCompile(`\b(January|February|March|April|May|June|July|August|September|October|November|December) ((19|20)[0-9][0-9])\b`)
298
299
300 func parsePage(p pdf.Page, pageNum int) *listing {
301 if debugging {
302 fmt.Fprintf(os.Stderr, "DEBUG: parsing page %d\n", pageNum)
303 }
304
305 parsed := new(listing)
306 parsed.pageNum = pageNum
307
308 content := p.Content()
309
310 for i, t := range content.Text {
311 if match(t, "Symbol", 11, "≠") {
312 t.Font = "NeoSansIntel"
313 t.FontSize = 9
314 content.Text[i] = t
315 }
316 if t.S == "*" || t.S == "**" || t.S == "***" || t.S == "," && t.Font == "Arial" && t.FontSize < 9 || t.S == "1" && t.Font == "Arial" {
317 t.Font = "NeoSansIntel"
318 t.FontSize = 9
319 if i+1 < len(content.Text) {
320 t.Y = content.Text[i+1].Y
321 }
322 content.Text[i] = t
323 }
324 }
325
326 text := findWords(content.Text)
327
328 for i, t := range text {
329 if match(t, "NeoSansIntel", 8, ".WIG") || match(t, "NeoSansIntel", 8, "AVX2") {
330 t.FontSize = 9
331 text[i] = t
332 }
333 if t.Font == "NeoSansIntel-Medium" {
334 t.Font = "NeoSansIntelMedium"
335 text[i] = t
336 }
337 if t.Font == "NeoSansIntel-Italic" {
338 t.Font = "NeoSansIntel,Italic"
339 text[i] = t
340 }
341 }
342
343 if debugging {
344 for _, t := range text {
345 fmt.Println(t)
346 }
347 }
348
349 if pageNum == 1 {
350 var buf bytes.Buffer
351 for _, t := range text {
352 buf.WriteString(t.S + "\n")
353 }
354 all := buf.String()
355 m := regexp.MustCompile(`Order Number: ([\w-\-]+)`).FindStringSubmatch(all)
356 num := "???"
357 if m != nil {
358 num = m[1]
359 }
360 date := dateRE.FindString(all)
361 if date == "" {
362 date = "???"
363 }
364
365 fmt.Printf("# x86 instruction set description version %s, %s\n",
366 specFormatVersion, time.Now().Format("2006-01-02"))
367 fmt.Printf("# Based on Intel Instruction Set Reference #%s, %s.\n", num, date)
368 fmt.Printf("# https://golang.org/x/arch/x86/x86spec\n")
369 }
370
371
372 out := text[:0]
373 for _, t := range text {
374 if shouldIgnore(t) {
375 continue
376 }
377 out = append(out, t)
378 }
379 text = out
380
381
382 if len(text) == 0 {
383 return parsed
384 }
385 if (!match(text[0], "NeoSansIntel", 9, "INSTRUCTION") || !match(text[0], "NeoSansIntel", 9, "REFERENCE")) &&
386 !match(text[0], "NeoSansIntel", 9, "EXTENSIONS") {
387 return parsed
388 }
389 text = text[1:]
390
391 enctable := findEncodingTable(text)
392 if enctable != nil {
393 parsed.enctables = append(parsed.enctables, enctable)
394 }
395
396 parsed.compat = findCompat(text)
397
398
399
400
401 if len(text) == 0 || !match(text[0], "NeoSansIntelMedium", 12, "") || !isInstHeadline(text[0].S) {
402 if debugging {
403 fmt.Fprintf(os.Stderr, "non-inst-headline: %v\n", text[0])
404 }
405 } else {
406 parsed.name = text[0].S
407 text = text[1:]
408 for len(text) > 0 && match(text[0], "NeoSansIntelMedium", 12, "") {
409 parsed.name += " " + text[0].S
410 text = text[1:]
411 }
412 parsed.name = fixDash.Replace(parsed.name)
413 }
414
415
416 i := 0
417 for i < len(text) && match(text[i], "NeoSansIntelMedium", 9, "") {
418 i++
419 }
420 for i < len(text) && match(text[i], "NeoSansIntel", 9, "") && text[i].S != "NOTES:" {
421 i++
422 }
423
424 mtable := findMnemonicTable(text[:i])
425 if mtable != nil {
426 parsed.mtables = append(parsed.mtables, mtable)
427 }
428
429 return parsed
430 }
431
432 func match(t pdf.Text, font string, size float64, substr string) bool {
433 return t.Font == font && math.Abs(t.FontSize-size) < 0.1 && strings.Contains(t.S, substr)
434 }
435
436 func shouldIgnore(t pdf.Text) bool {
437
438
439 if (t.S == "*" || t.S == "\\") && strings.HasPrefix(t.Font, "Arial") {
440 return true
441 }
442
443
444 if len(t.S) == 1 && '1' <= t.S[0] && t.S[0] <= '9' || t.S == "ST(0)" || t.S == "x" {
445 if match(t, "NeoSansIntel", 7.2, "") || match(t, "NeoSansIntel", 5.6, "") || match(t, "NeoSansIntelMedium", 8, "") || match(t, "NeoSansIntelMedium", 9.6, "") {
446 return true
447 }
448 }
449
450 return false
451 }
452
453 func isInstHeadline(s string) bool {
454 return strings.Contains(s, "—") ||
455 strings.Contains(s, " - ") ||
456 strings.Contains(s, "PTEST- Logical Compare")
457 }
458
459 func findWords(chars []pdf.Text) (words []pdf.Text) {
460
461 const nudge = 1
462 sort.Sort(pdf.TextVertical(chars))
463 old := -100000.0
464 for i, c := range chars {
465 if c.Y != old && math.Abs(old-c.Y) < nudge {
466 chars[i].Y = old
467 } else {
468 old = c.Y
469 }
470 }
471
472
473
474 sort.Sort(pdf.TextVertical(chars))
475
476
477 for i := 0; i < len(chars); {
478
479 j := i + 1
480 for j < len(chars) && chars[j].Y == chars[i].Y {
481 j++
482 }
483 var end float64
484
485 for k := i; k < j; {
486 ck := &chars[k]
487 s := ck.S
488 end = ck.X + ck.W
489 charSpace := ck.FontSize / 6
490 wordSpace := ck.FontSize * 2 / 3
491 l := k + 1
492 for l < j {
493
494 cl := &chars[l]
495 if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+charSpace {
496 s += cl.S
497 end = cl.X + cl.W
498 l++
499 continue
500 }
501
502 if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+wordSpace {
503 s += " " + cl.S
504 end = cl.X + cl.W
505 l++
506 continue
507 }
508 break
509 }
510 f := ck.Font
511 f = strings.TrimSuffix(f, ",Italic")
512 f = strings.TrimSuffix(f, "-Italic")
513 words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end, s})
514 k = l
515 }
516 i = j
517 }
518
519 return words
520 }
521
522 func sameFont(f1, f2 string) bool {
523 f1 = strings.TrimSuffix(f1, ",Italic")
524 f1 = strings.TrimSuffix(f1, "-Italic")
525 f2 = strings.TrimSuffix(f1, ",Italic")
526 f2 = strings.TrimSuffix(f1, "-Italic")
527 return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
528 }
529
530 func findMnemonicTable(text []pdf.Text) [][]string {
531 sort.Sort(pdf.TextHorizontal(text))
532
533 const nudge = 1
534
535 old := -100000.0
536 var col []float64
537 for i, t := range text {
538 if t.Font != "NeoSansIntelMedium" {
539 continue
540 }
541 if t.X != old && math.Abs(old-t.X) < nudge {
542 text[i].X = old
543 } else if t.X != old {
544 old = t.X
545 col = append(col, old)
546 }
547 }
548 sort.Sort(pdf.TextVertical(text))
549
550 if len(col) == 0 {
551 return nil
552 }
553
554 y := -100000.0
555 var table [][]string
556 var line []string
557 bold := -1
558 for _, t := range text {
559 if t.Y != y {
560 table = append(table, make([]string, len(col)))
561 line = table[len(table)-1]
562 y = t.Y
563 if t.Font == "NeoSansIntelMedium" {
564 bold = len(table) - 1
565 }
566 }
567 i := 0
568 for i+1 < len(col) && col[i+1] <= t.X+nudge {
569 i++
570 }
571 if line[i] != "" {
572 line[i] += " "
573 }
574 line[i] += t.S
575 }
576
577 var mtable [][]string
578 for i, t := range table {
579 if 0 < i && i <= bold || bold < i && halfMissing(t) {
580
581 last := mtable[len(mtable)-1]
582 for j, s := range t {
583 if s != "" {
584 last[j] += "\n" + s
585 }
586 }
587 } else {
588 mtable = append(mtable, t)
589 }
590 }
591
592 if bold >= 0 {
593 heading := mtable[0]
594 for i, x := range heading {
595 heading[i] = fixHeading.Replace(x)
596 }
597 }
598
599 return mtable
600 }
601
602 var fixHeading = strings.NewReplacer(
603 "64/32-\nbit\nMode", "64/32-Bit Mode",
604 "64/32-\nbit Mode", "64/32-Bit Mode",
605 "64/32-bit\nMode", "64/32-Bit Mode",
606 "64/3\n2-bit\nMode", "64/32-Bit Mode",
607 "64/32 bit\nMode\nSupport", "64/32-Bit Mode",
608 "64/32bit\nMode\nSupport", "64/32-Bit Mode",
609 "64/32\n-bit\nMode", "64/32-Bit Mode",
610 "64/32\nbit Mode\nSupport", "64/32-Bit Mode",
611 "64-Bit\nMode", "64-Bit Mode",
612 "64-bit\nMode", "64-Bit Mode",
613
614 "Op/ En", "Op/En",
615 "Op/\nEn", "Op/En",
616 "Op/\nEN", "Op/En",
617 "Op /\nEn", "Op/En",
618 "Opcode***", "Opcode",
619 "Opcode**", "Opcode",
620 "Opcode*", "Opcode",
621 "/\nInstruction", "/Instruction",
622
623 "CPUID Fea-\nture Flag", "CPUID Feature Flag",
624 "CPUID\nFeature\nFlag", "CPUID Feature Flag",
625 "CPUID\nFeature Flag", "CPUID Feature Flag",
626 "CPUIDFeature\nFlag", "CPUID Feature Flag",
627
628 "Compat/\nLeg Mode*", "Compat/Leg Mode",
629 "Compat/\nLeg Mode", "Compat/Leg Mode",
630 "Compat/ *\nLeg Mode", "Compat/Leg Mode",
631 )
632
633 func halfMissing(x []string) bool {
634 n := 0
635 for _, s := range x {
636 if s == "" {
637 n++
638 }
639 }
640 return n >= len(x)/2
641 }
642
643 func findEncodingTable(text []pdf.Text) [][]string {
644
645 sort.Sort(pdf.TextVertical(text))
646 var col []float64
647 sawTitle := false
648
649 center := func(t pdf.Text) float64 {
650 return t.X + t.W/2
651 }
652
653 start := 0
654 end := len(text)
655 for i, t := range text {
656 if match(t, "NeoSansIntelMedium", 10, "Instruction Operand Encoding") {
657 sawTitle = true
658 start = i + 1
659 continue
660 }
661 if !sawTitle {
662 continue
663 }
664 if match(t, "NeoSansIntel", 9, "Op/En") || match(t, "NeoSansIntel", 9, "Operand") {
665 if debugging {
666 fmt.Printf("column %d at %.2f: %v\n", len(col), center(t), t)
667 }
668 col = append(col, center(t))
669 }
670 if match(t, "NeoSansIntelMedium", 10, "Description") {
671 end = i
672 break
673 }
674 }
675 text = text[start:end]
676
677 if len(col) == 0 {
678 return nil
679 }
680
681 const nudge = 20
682
683 y := -100000.0
684 var table [][]string
685 var line []string
686 for _, t := range text {
687 if t.Y != y {
688 table = append(table, make([]string, len(col)))
689 line = table[len(table)-1]
690 y = t.Y
691 }
692 i := 0
693 x := center(t)
694 for i+1 < len(col) && col[i+1] <= x+nudge {
695 i++
696 }
697 if debugging {
698 fmt.Printf("text at %.2f: %v => %d\n", x, t, i)
699 }
700 if line[i] != "" {
701 line[i] += " "
702 }
703 line[i] += t.S
704 }
705
706 out := table[:0]
707 for _, line := range table {
708 if strings.HasPrefix(line[len(line)-1], "Vol. 2") {
709 continue
710 }
711 if line[0] == "" && len(out) > 0 {
712 last := out[len(out)-1]
713 for i, col := range line {
714 if col != "" {
715 last[i] += " " + col
716 }
717 }
718 continue
719 }
720 out = append(out, line)
721 }
722 table = out
723
724 return table
725 }
726
727 func findCompat(text []pdf.Text) string {
728 sort.Sort(pdf.TextVertical(text))
729
730 inCompat := false
731 out := ""
732 for _, t := range text {
733 if match(t, "NeoSansIntelMedium", 10, "") {
734 inCompat = strings.Contains(t.S, "Architecture Compatibility")
735 if inCompat {
736 out += t.S + "\n"
737 }
738 }
739 if inCompat && match(t, "Verdana", 9, "") || strings.Contains(t.S, "were introduced") {
740 out += t.S + "\n"
741 }
742 }
743 return out
744 }
745
746 func processListing(p *listing, insts *[]*instruction) {
747 if debugging {
748 for _, table := range p.mtables {
749 fmt.Printf("table:\n")
750 for _, row := range table {
751 fmt.Printf("%q\n", row)
752 }
753 }
754 fmt.Printf("enctable:\n")
755 for _, table := range p.enctables {
756 for _, row := range table {
757 fmt.Printf("%q\n", row)
758 }
759 }
760 fmt.Printf("compat:\n%s", p.compat)
761 }
762
763 if *flagCompat && p.compat != "" {
764 fmt.Printf("# p.%d: %s\n#\t%s\n", p.pageNum, p.name, strings.Replace(p.compat, "\n", "\n#\t", -1))
765 }
766
767 encs := make(map[string][]string)
768 for _, table := range p.enctables {
769 for _, row := range table[1:] {
770 for len(row) > 1 && (row[len(row)-1] == "NA" || row[len(row)-1] == "" || row[len(row)-1] == " source") {
771 row = row[:len(row)-1]
772 }
773 encs[row[0]] = row[1:]
774 }
775 }
776
777 var wrong string
778 for _, table := range p.mtables {
779 heading := table[0]
780 for _, row := range table[1:] {
781 if row[0] == heading[0] && reflect.DeepEqual(row, heading) {
782 continue
783 }
784 if len(row) >= 5 && row[1] == "CMOVG r64, r/m64" && row[3] == "V/N.E." && row[4] == "NA" {
785 row[3] = "V"
786 row[4] = "N.E."
787 }
788 inst := new(instruction)
789 inst.page = p.pageNum
790 inst.compat = strings.Join(strings.Fields(p.compat), " ")
791 for i, hdr := range heading {
792 x := row[i]
793 x = strings.Replace(x, "\n", " ", -1)
794 switch strings.TrimSpace(hdr) {
795 default:
796 wrong = "unexpected header: " + strconv.Quote(hdr)
797 goto BadTable
798 case "Opcode/Instruction":
799 x = row[i]
800 if strings.HasPrefix(x, "\nVEX") {
801 x = x[1:]
802 row[i] = x
803 }
804 if strings.Contains(x, "\n/r ") {
805 x = strings.Replace(x, "\n/r ", " /r ", -1)
806 row[i] = x
807 }
808 if strings.Contains(x, ",\nimm") {
809 x = strings.Replace(x, ",\nimm", ", imm", -1)
810 row[i] = x
811 }
812 if strings.Count(x, "\n") < 1 {
813 wrong = "bad Opcode/Instruction pairing: " + strconv.Quote(x)
814 goto BadTable
815 }
816 i := strings.Index(x, "\n")
817 inst.opcode = x[:i]
818 inst.syntax = strings.Replace(x[i+1:], "\n", " ", -1)
819
820 case "Opcode":
821 inst.opcode = x
822
823 case "Instruction":
824 inst.syntax = x
825
826 case "Op/En":
827 inst.args = encs[x]
828 if inst.args == nil && len(encs) == 1 && encs["A"] != nil {
829 inst.args = encs["A"]
830 }
831
832
833 if inst.args == nil && inst.syntax == "PREFETCHW m8" && x == "A" && len(encs) == 1 && encs["M"] != nil {
834 inst.args = encs["M"]
835 }
836
837 case "64-Bit Mode":
838 x, ok := parseMode(x)
839 if !ok {
840 wrong = "unexpected value for 64-Bit Mode column: " + x
841 goto BadTable
842 }
843 inst.valid64 = x
844
845 case "Compat/Leg Mode":
846 x, ok := parseMode(x)
847 if !ok {
848 wrong = "unexpected value for Compat/Leg Mode column: " + x
849 goto BadTable
850 }
851 inst.valid32 = x
852
853 case "64/32-Bit Mode":
854 i := strings.Index(x, "/")
855 if i < 0 {
856 wrong = "unexpected value for 64/32-Bit Mode column: " + x
857 goto BadTable
858 }
859 x1, ok1 := parseMode(x[:i])
860 x2, ok2 := parseMode(x[i+1:])
861 if !ok1 || !ok2 {
862 wrong = "unexpected value for 64/32-Bit Mode column: " + x
863 goto BadTable
864 }
865 inst.valid64 = x1
866 inst.valid32 = x2
867
868 case "CPUID Feature Flag":
869 inst.cpuid = x
870
871 case "Description":
872 if inst.desc != "" {
873 inst.desc += " "
874 }
875 inst.desc += x
876 }
877 }
878
879
880 if inst.opcode == "VEX.128.66.0F.W0 6E /" {
881 inst.opcode += "r"
882 }
883 fix := func(old, new string) {
884 inst.opcode = strings.Replace(inst.opcode, old, new, -1)
885 }
886 fix(" imm8", " ib")
887 fix("REX.w", "REX.W")
888 fix("REX.W+", "REX.W +")
889 fix(" 0f ", " 0F ")
890 fix(". 0F38", ".0F38")
891 fix("0F .WIG", "0F.WIG")
892 fix("0F38 .WIG", "0F38.WIG")
893 fix("NDS .LZ", "NDS.LZ")
894 fix("58+ r", "58+r")
895 fix("B0+ ", "B0+")
896 fix("B8+ ", "B8+")
897 fix("40+ ", "40+")
898 fix("*", "")
899 fix(",", " ")
900 fix("/", " /")
901 fix("REX.W +", "REX.W")
902 fix("REX +", "REX")
903 fix("REX 0F BE", "REX.W 0F BE")
904 fix("REX 0F B2", "REX.W 0F B2")
905 fix("REX 0F B4", "REX.W 0F B4")
906 fix("REX 0F B5", "REX.W 0F B5")
907 fix("0F38.0", "0F38.W0")
908 fix(".660F.", ".66.0F.")
909 fix("VEX128", "VEX.128")
910 fix("0F3A.W0.1D", "0F3A.W0 1D")
911
912 inst.opcode = strings.Join(strings.Fields(inst.opcode), " ")
913
914 fix = func(old, new string) {
915 inst.syntax = strings.Replace(inst.syntax, old, new, -1)
916 }
917 fix("xmm1 xmm2", "xmm1, xmm2")
918 fix("r16/m16", "r/m16")
919 fix("r32/m161", "r32/m16")
920 fix("r32/m32", "r/m32")
921 fix("r64/m64", "r/m64")
922 fix("\u2013", "-")
923 fix("mm3 /m", "mm3/m")
924 fix("mm3/.m", "mm3/m")
925 inst.syntax = joinSyntax(splitSyntax(inst.syntax))
926
927 fix = func(old, new string) {
928 inst.cpuid = strings.Replace(inst.cpuid, old, new, -1)
929 }
930 fix("PCLMUL- QDQ", "PCLMULQDQ")
931 fix("PCL- MULQDQ", "PCLMULQDQ")
932 fix("Both PCLMULQDQ and AVX flags", "PCLMULQDQ+AVX")
933
934 if !instBlacklist[inst.syntax] {
935 *insts = append(*insts, inst)
936 }
937 }
938 }
939 return
940
941 BadTable:
942 fmt.Fprintf(os.Stderr, "p.%d: reading %v: %v\n", p.pageNum, p.name, wrong)
943 for _, table := range p.mtables {
944 for _, t := range table {
945 fmt.Fprintf(os.Stderr, "\t%q\n", t)
946 }
947 }
948 fmt.Fprintf(os.Stderr, "\n")
949 }
950
951 func parseMode(s string) (string, bool) {
952 switch strings.TrimSpace(s) {
953 case "Invalid", "Invalid*", "Inv.", "I", "i":
954 return "I", true
955 case "Valid", "Valid*", "V":
956 return "V", true
957 case "N.E.", "NE", "N. E.":
958 return "N.E.", true
959 case "N.P.", "N. P.":
960 return "N.P.", true
961 case "N.S.", "N. S.":
962 return "N.S.", true
963 case "N.I.", "N. I.":
964 return "N.I.", true
965 }
966 return s, false
967 }
968
969 func splitSyntax(syntax string) (op string, args []string) {
970 i := strings.Index(syntax, " ")
971 if i < 0 {
972 return syntax, nil
973 }
974 op, syntax = syntax[:i], syntax[i+1:]
975 args = strings.Split(syntax, ",")
976 for i, arg := range args {
977 arg = strings.TrimSpace(arg)
978 arg = strings.TrimRight(arg, "*")
979 args[i] = arg
980 }
981 return
982 }
983
984 func joinSyntax(op string, args []string) string {
985 if len(args) == 0 {
986 return op
987 }
988 return op + " " + strings.Join(args, ", ")
989 }
990
View as plain text