
Source file src/golang.org/x/arch/x86/x86spec/parse.go

Documentation: golang.org/x/arch/x86/x86spec

     1  // Copyright 2016 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     5  package main
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"log"
    13  	"math"
    14  	"os"
    15  	"reflect"
    16  	"regexp"
    17  	"sort"
    18  	"strconv"
    19  	"strings"
    20  	"time"
    22  	"rsc.io/pdf"
    23  )
    25  // listing holds information about one or more parsed manual pages
    26  // concerning a single instruction listing.
    27  type listing struct {
    28  	pageNum   int
    29  	name      string       // instruction heading
    30  	mtables   [][][]string // mnemonic tables (at most one per page)
    31  	enctables [][][]string // encoding tables (at most one per page)
    32  	compat    string
    33  }
    35  type logReaderAt struct {
    36  	f io.ReaderAt
    37  }
    39  func (l *logReaderAt) ReadAt(x []byte, off int64) (int, error) {
    40  	log.Printf("read %d @ %d", len(x), off)
    41  	return l.f.ReadAt(x, off)
    42  }
    44  const (
    45  	cacheBlockSize = 64 * 1024
    46  	numCacheBlock  = 16
    47  )
    49  type cachedReaderAt struct {
    50  	r     io.ReaderAt
    51  	cache *cacheBlock
    52  }
    54  type cacheBlock struct {
    55  	next   *cacheBlock
    56  	buf    []byte
    57  	offset int64
    58  	err    error
    59  }
    61  func newCachedReaderAt(r io.ReaderAt) *cachedReaderAt {
    62  	c := &cachedReaderAt{
    63  		r: r,
    64  	}
    65  	for i := 0; i < numCacheBlock; i++ {
    66  		c.cache = &cacheBlock{next: c.cache}
    67  	}
    68  	return c
    69  }
    71  func (c *cachedReaderAt) ReadAt(p []byte, offset int64) (n int, err error) {
    72  	// Assume large reads indicate a caller that doesn't need caching.
    73  	if len(p) >= cacheBlockSize {
    74  		return c.r.ReadAt(p, offset)
    75  	}
    77  	for n < len(p) {
    78  		o := offset + int64(n)
    79  		f := o & (cacheBlockSize - 1)
    80  		b := c.readBlock(o - f)
    81  		n += copy(p[n:], b.buf[f:])
    82  		if n < len(p) && b.err != nil {
    83  			return n, b.err
    84  		}
    85  	}
    86  	return n, nil
    87  }
    89  var errShortRead = errors.New("short read")
    91  func (c *cachedReaderAt) readBlock(offset int64) *cacheBlock {
    92  	if offset&(cacheBlockSize-1) != 0 {
    93  		panic("misuse of cachedReaderAt.readBlock")
    94  	}
    96  	// Look in cache.
    97  	var b, prev *cacheBlock
    98  	for b = c.cache; ; prev, b = b, b.next {
    99  		if b.buf != nil && b.offset == offset {
   100  			// Move to front.
   101  			if prev != nil {
   102  				prev.next = b.next
   103  				b.next = c.cache
   104  				c.cache = b
   105  			}
   106  			return b
   107  		}
   108  		if b.next == nil {
   109  			break
   110  		}
   111  	}
   113  	// Otherwise b is LRU block in cache, prev points at b.
   114  	if b.buf == nil {
   115  		b.buf = make([]byte, cacheBlockSize)
   116  	}
   117  	b.offset = offset
   118  	n, err := c.r.ReadAt(b.buf[:cacheBlockSize], offset)
   119  	b.buf = b.buf[:n]
   120  	b.err = err
   121  	if n > 0 {
   122  		// Move to front.
   123  		prev.next = nil
   124  		b.next = c.cache
   125  		c.cache = b
   126  	}
   127  	return b
   128  }
   130  func pdfOpen(name string) (*pdf.Reader, error) {
   131  	f, err := os.Open(name)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  	fi, err := f.Stat()
   136  	if err != nil {
   137  		f.Close()
   138  		return nil, err
   139  	}
   140  	return pdf.NewReader(newCachedReaderAt(f), fi.Size())
   141  }
   143  func parse() []*instruction {
   144  	var insts []*instruction
   146  	f, err := pdfOpen(*flagFile)
   147  	if err != nil {
   148  		log.Fatal(err)
   149  	}
   151  	// Find instruction set reference in outline, to build instruction list.
   152  	instList := instHeadings(f.Outline())
   153  	if len(instList) < 200 {
   154  		log.Fatalf("only found %d instructions in table of contents", len(instList))
   155  	}
   157  	// Scan document looking for instructions.
   158  	// Must find exactly the ones in the outline.
   159  	n := f.NumPage()
   160  	var current *listing
   161  	finishInstruction := func() {
   162  		if current == nil {
   163  			return
   164  		}
   165  		if len(current.mtables) == 0 || len(current.mtables[0]) <= 1 {
   166  			fmt.Fprintf(os.Stderr, "p.%d: no mnemonics for instruction %q\n", current.pageNum, current.name)
   167  		}
   168  		processListing(current, &insts)
   169  		current = nil
   170  	}
   172  	for pageNum := 1; pageNum <= n; pageNum++ {
   173  		if onlySomePages && !isDebugPage(pageNum) {
   174  			continue
   175  		}
   176  		p := f.Page(pageNum)
   177  		parsed := parsePage(p, pageNum)
   178  		if parsed.name != "" {
   179  			finishInstruction()
   180  			for j, headline := range instList {
   181  				if parsed.name == headline {
   182  					instList[j] = ""
   183  					current = parsed
   184  					break
   185  				}
   186  			}
   187  			if current == nil {
   188  				fmt.Fprintf(os.Stderr, "p.%d: unexpected instruction %q\n", pageNum, parsed.name)
   189  			}
   190  			continue
   191  		}
   192  		if current != nil {
   193  			merge(current, parsed)
   194  			continue
   195  		}
   196  		if parsed.mtables != nil {
   197  			fmt.Fprintf(os.Stderr, "p.%d: unexpected mnemonic table\n", pageNum)
   198  		}
   199  		if parsed.enctables != nil {
   200  			fmt.Fprintf(os.Stderr, "p.%d: unexpected encoding table\n", pageNum)
   201  		}
   202  		if parsed.compat != "" {
   203  			fmt.Fprintf(os.Stderr, "p.%d: unexpected compatibility statement\n", pageNum)
   204  		}
   205  	}
   206  	finishInstruction()
   208  	if !onlySomePages {
   209  		for _, headline := range instList {
   210  			if headline != "" {
   211  				fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline)
   212  			}
   213  		}
   214  	}
   216  	return insts
   217  }
   219  // isDebugPage reports whether the -debugpage flag mentions page n.
   220  // The argument is a comma-separated list of pages.
   221  // Maybe some day it will support ranges.
   222  func isDebugPage(n int) bool {
   223  	s := *flagDebugPage
   224  	var k int
   225  	for i := 0; ; i++ {
   226  		if i == len(s) || s[i] == ',' {
   227  			if n == k {
   228  				return true
   229  			}
   230  			k = 0
   231  		}
   232  		if i == len(s) {
   233  			break
   234  		}
   235  		if '0' <= s[i] && s[i] <= '9' {
   236  			k = k*10 + int(s[i]) - '0'
   237  		}
   238  	}
   239  	return false
   240  }
   242  // merge merges the content of y into the running collection in x.
   243  func merge(x, y *listing) {
   244  	if y.name != "" {
   245  		fmt.Fprintf(os.Stderr, "p.%d: merging page incorrectly\n", y.pageNum)
   246  		return
   247  	}
   249  	x.mtables = append(x.mtables, y.mtables...)
   250  	x.enctables = append(x.enctables, y.enctables...)
   251  	x.compat += y.compat
   252  }
   254  // instHeadings returns the list of instruction headings from the table of contents.
   255  // When we parse the pages we expect to find every one of these.
   256  func instHeadings(outline pdf.Outline) []string {
   257  	return appendInstHeadings(outline, nil)
   258  }
   260  var instRE = regexp.MustCompile(`\d Instructions \([A-Z]-[A-Z]\)|VMX Instructions|Instruction SET Reference|SHA Extensions Reference`)
   262  // The headings are inconsistent about dash and superscript usage. Normalize.
   263  var fixDash = strings.NewReplacer(
   264  	"Compute 2 –1", "Compute 2^x-1",
   265  	"Compute 2x-1", "Compute 2^x-1",
   266  	"Compute 2x–1", "Compute 2^x-1",
   267  	"/ FUCOMI", "/FUCOMI",
   268  	"Compute y ∗ log x", "Compute y * log₂x",
   269  	"Compute y * log2x", "Compute y * log₂x",
   270  	"Compute y * log2(x +1)", "Compute y * log₂(x+1)",
   271  	"Compute y ∗ log (x +1)", "Compute y * log₂(x+1)",
   272  	" — ", "-",
   273  	"— ", "-",
   274  	" —", "-",
   275  	"—", "-",
   276  	" – ", "-",
   277  	" –", "-",
   278  	"– ", "-",
   279  	"–", "-",
   280  	" - ", "-",
   281  	"- ", "-",
   282  	" -", "-",
   283  )
   285  func appendInstHeadings(outline pdf.Outline, list []string) []string {
   286  	if instRE.MatchString(outline.Title) {
   287  		for _, child := range outline.Child {
   288  			list = append(list, fixDash.Replace(child.Title))
   289  		}
   290  	}
   291  	for _, child := range outline.Child {
   292  		list = appendInstHeadings(child, list)
   293  	}
   294  	return list
   295  }
   297  var dateRE = regexp.MustCompile(`\b(January|February|March|April|May|June|July|August|September|October|November|December) ((19|20)[0-9][0-9])\b`)
   299  // parsePage parses a single PDF page and returns the content it found.
   300  func parsePage(p pdf.Page, pageNum int) *listing {
   301  	if debugging {
   302  		fmt.Fprintf(os.Stderr, "DEBUG: parsing page %d\n", pageNum)
   303  	}
   305  	parsed := new(listing)
   306  	parsed.pageNum = pageNum
   308  	content := p.Content()
   310  	for i, t := range content.Text {
   311  		if match(t, "Symbol", 11, "≠") {
   312  			t.Font = "NeoSansIntel"
   313  			t.FontSize = 9
   314  			content.Text[i] = t
   315  		}
   316  		if t.S == "*" || t.S == "**" || t.S == "***" || t.S == "," && t.Font == "Arial" && t.FontSize < 9 || t.S == "1" && t.Font == "Arial" {
   317  			t.Font = "NeoSansIntel"
   318  			t.FontSize = 9
   319  			if i+1 < len(content.Text) {
   320  				t.Y = content.Text[i+1].Y
   321  			}
   322  			content.Text[i] = t
   323  		}
   324  	}
   326  	text := findWords(content.Text)
   328  	for i, t := range text {
   329  		if match(t, "NeoSansIntel", 8, ".WIG") || match(t, "NeoSansIntel", 8, "AVX2") {
   330  			t.FontSize = 9
   331  			text[i] = t
   332  		}
   333  		if t.Font == "NeoSansIntel-Medium" {
   334  			t.Font = "NeoSansIntelMedium"
   335  			text[i] = t
   336  		}
   337  		if t.Font == "NeoSansIntel-Italic" {
   338  			t.Font = "NeoSansIntel,Italic"
   339  			text[i] = t
   340  		}
   341  	}
   343  	if debugging {
   344  		for _, t := range text {
   345  			fmt.Println(t)
   346  		}
   347  	}
   349  	if pageNum == 1 {
   350  		var buf bytes.Buffer
   351  		for _, t := range text {
   352  			buf.WriteString(t.S + "\n")
   353  		}
   354  		all := buf.String()
   355  		m := regexp.MustCompile(`Order Number: ([\w-\-]+)`).FindStringSubmatch(all)
   356  		num := "???"
   357  		if m != nil {
   358  			num = m[1]
   359  		}
   360  		date := dateRE.FindString(all)
   361  		if date == "" {
   362  			date = "???"
   363  		}
   365  		fmt.Printf("# x86 instruction set description version %s, %s\n",
   366  			specFormatVersion, time.Now().Format("2006-01-02"))
   367  		fmt.Printf("# Based on Intel Instruction Set Reference #%s, %s.\n", num, date)
   368  		fmt.Printf("# https://golang.org/x/arch/x86/x86spec\n")
   369  	}
   371  	// Remove text we should ignore.
   372  	out := text[:0]
   373  	for _, t := range text {
   374  		if shouldIgnore(t) {
   375  			continue
   376  		}
   377  		out = append(out, t)
   378  	}
   379  	text = out
   381  	// Page header must say instruction set reference.
   382  	if len(text) == 0 {
   383  		return parsed
   384  	}
   385  	if (!match(text[0], "NeoSansIntel", 9, "INSTRUCTION") || !match(text[0], "NeoSansIntel", 9, "REFERENCE")) &&
   386  		!match(text[0], "NeoSansIntel", 9, "EXTENSIONS") {
   387  		return parsed
   388  	}
   389  	text = text[1:]
   391  	enctable := findEncodingTable(text)
   392  	if enctable != nil {
   393  		parsed.enctables = append(parsed.enctables, enctable)
   394  	}
   396  	parsed.compat = findCompat(text)
   398  	// Narrow scope for finding mnemonic table.
   399  	// Must be last, since it trims text.
   400  	// Next line is headline. Can wrap to multiple lines.
   401  	if len(text) == 0 || !match(text[0], "NeoSansIntelMedium", 12, "") || !isInstHeadline(text[0].S) {
   402  		if debugging {
   403  			fmt.Fprintf(os.Stderr, "non-inst-headline: %v\n", text[0])
   404  		}
   405  	} else {
   406  		parsed.name = text[0].S
   407  		text = text[1:]
   408  		for len(text) > 0 && match(text[0], "NeoSansIntelMedium", 12, "") {
   409  			parsed.name += " " + text[0].S
   410  			text = text[1:]
   411  		}
   412  		parsed.name = fixDash.Replace(parsed.name)
   413  	}
   415  	// Table follows; heading is NeoSansIntelMedium and rows are NeoSansIntel.
   416  	i := 0
   417  	for i < len(text) && match(text[i], "NeoSansIntelMedium", 9, "") {
   418  		i++
   419  	}
   420  	for i < len(text) && match(text[i], "NeoSansIntel", 9, "") && text[i].S != "NOTES:" {
   421  		i++
   422  	}
   424  	mtable := findMnemonicTable(text[:i])
   425  	if mtable != nil {
   426  		parsed.mtables = append(parsed.mtables, mtable)
   427  	}
   429  	return parsed
   430  }
   432  func match(t pdf.Text, font string, size float64, substr string) bool {
   433  	return t.Font == font && math.Abs(t.FontSize-size) < 0.1 && strings.Contains(t.S, substr)
   434  }
   436  func shouldIgnore(t pdf.Text) bool {
   437  	// Ignore footnote stars, which are in Arial.
   438  	// Also, the page describing MOVS has a tiny 2pt Arial backslash.
   439  	if (t.S == "*" || t.S == "\\") && strings.HasPrefix(t.Font, "Arial") {
   440  		return true
   441  	}
   443  	// Ignore superscript numbers, superscript ST(0), and superscript x.
   444  	if len(t.S) == 1 && '1' <= t.S[0] && t.S[0] <= '9' || t.S == "ST(0)" || t.S == "x" {
   445  		if match(t, "NeoSansIntel", 7.2, "") || match(t, "NeoSansIntel", 5.6, "") || match(t, "NeoSansIntelMedium", 8, "") || match(t, "NeoSansIntelMedium", 9.6, "") {
   446  			return true
   447  		}
   448  	}
   450  	return false
   451  }
   453  func isInstHeadline(s string) bool {
   454  	return strings.Contains(s, "—") ||
   455  		strings.Contains(s, " - ") ||
   456  		strings.Contains(s, "PTEST- Logical Compare")
   457  }
   459  func findWords(chars []pdf.Text) (words []pdf.Text) {
   460  	// Sort by Y coordinate and normalize.
   461  	const nudge = 1
   462  	sort.Sort(pdf.TextVertical(chars))
   463  	old := -100000.0
   464  	for i, c := range chars {
   465  		if c.Y != old && math.Abs(old-c.Y) < nudge {
   466  			chars[i].Y = old
   467  		} else {
   468  			old = c.Y
   469  		}
   470  	}
   472  	// Sort by Y coordinate, breaking ties with X.
   473  	// This will bring letters in a single word together.
   474  	sort.Sort(pdf.TextVertical(chars))
   476  	// Loop over chars.
   477  	for i := 0; i < len(chars); {
   478  		// Find all chars on line.
   479  		j := i + 1
   480  		for j < len(chars) && chars[j].Y == chars[i].Y {
   481  			j++
   482  		}
   483  		var end float64
   484  		// Split line into words (really, phrases).
   485  		for k := i; k < j; {
   486  			ck := &chars[k]
   487  			s := ck.S
   488  			end = ck.X + ck.W
   489  			charSpace := ck.FontSize / 6
   490  			wordSpace := ck.FontSize * 2 / 3
   491  			l := k + 1
   492  			for l < j {
   493  				// Grow word.
   494  				cl := &chars[l]
   495  				if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+charSpace {
   496  					s += cl.S
   497  					end = cl.X + cl.W
   498  					l++
   499  					continue
   500  				}
   501  				// Add space to phrase before next word.
   502  				if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+wordSpace {
   503  					s += " " + cl.S
   504  					end = cl.X + cl.W
   505  					l++
   506  					continue
   507  				}
   508  				break
   509  			}
   510  			f := ck.Font
   511  			f = strings.TrimSuffix(f, ",Italic")
   512  			f = strings.TrimSuffix(f, "-Italic")
   513  			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end, s})
   514  			k = l
   515  		}
   516  		i = j
   517  	}
   519  	return words
   520  }
   522  func sameFont(f1, f2 string) bool {
   523  	f1 = strings.TrimSuffix(f1, ",Italic")
   524  	f1 = strings.TrimSuffix(f1, "-Italic")
   525  	f2 = strings.TrimSuffix(f1, ",Italic")
   526  	f2 = strings.TrimSuffix(f1, "-Italic")
   527  	return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
   528  }
   530  func findMnemonicTable(text []pdf.Text) [][]string {
   531  	sort.Sort(pdf.TextHorizontal(text))
   533  	const nudge = 1
   535  	old := -100000.0
   536  	var col []float64
   537  	for i, t := range text {
   538  		if t.Font != "NeoSansIntelMedium" { // only headings count
   539  			continue
   540  		}
   541  		if t.X != old && math.Abs(old-t.X) < nudge {
   542  			text[i].X = old
   543  		} else if t.X != old {
   544  			old = t.X
   545  			col = append(col, old)
   546  		}
   547  	}
   548  	sort.Sort(pdf.TextVertical(text))
   550  	if len(col) == 0 {
   551  		return nil
   552  	}
   554  	y := -100000.0
   555  	var table [][]string
   556  	var line []string
   557  	bold := -1
   558  	for _, t := range text {
   559  		if t.Y != y {
   560  			table = append(table, make([]string, len(col)))
   561  			line = table[len(table)-1]
   562  			y = t.Y
   563  			if t.Font == "NeoSansIntelMedium" {
   564  				bold = len(table) - 1
   565  			}
   566  		}
   567  		i := 0
   568  		for i+1 < len(col) && col[i+1] <= t.X+nudge {
   569  			i++
   570  		}
   571  		if line[i] != "" {
   572  			line[i] += " "
   573  		}
   574  		line[i] += t.S
   575  	}
   577  	var mtable [][]string
   578  	for i, t := range table {
   579  		if 0 < i && i <= bold || bold < i && halfMissing(t) {
   580  			// merge with earlier line
   581  			last := mtable[len(mtable)-1]
   582  			for j, s := range t {
   583  				if s != "" {
   584  					last[j] += "\n" + s
   585  				}
   586  			}
   587  		} else {
   588  			mtable = append(mtable, t)
   589  		}
   590  	}
   592  	if bold >= 0 {
   593  		heading := mtable[0]
   594  		for i, x := range heading {
   595  			heading[i] = fixHeading.Replace(x)
   596  		}
   597  	}
   599  	return mtable
   600  }
   602  var fixHeading = strings.NewReplacer(
   603  	"64/32-\nbit\nMode", "64/32-Bit Mode",
   604  	"64/32-\nbit Mode", "64/32-Bit Mode",
   605  	"64/32-bit\nMode", "64/32-Bit Mode",
   606  	"64/3\n2-bit\nMode", "64/32-Bit Mode",
   607  	"64/32 bit\nMode\nSupport", "64/32-Bit Mode",
   608  	"64/32bit\nMode\nSupport", "64/32-Bit Mode",
   609  	"64/32\n-bit\nMode", "64/32-Bit Mode",
   610  	"64/32\nbit Mode\nSupport", "64/32-Bit Mode",
   611  	"64-Bit\nMode", "64-Bit Mode",
   612  	"64-bit\nMode", "64-Bit Mode",
   614  	"Op/ En", "Op/En",
   615  	"Op/\nEn", "Op/En",
   616  	"Op/\nEN", "Op/En",
   617  	"Op /\nEn", "Op/En",
   618  	"Opcode***", "Opcode",
   619  	"Opcode**", "Opcode",
   620  	"Opcode*", "Opcode",
   621  	"/\nInstruction", "/Instruction",
   623  	"CPUID Fea-\nture Flag", "CPUID Feature Flag",
   624  	"CPUID\nFeature\nFlag", "CPUID Feature Flag",
   625  	"CPUID\nFeature Flag", "CPUID Feature Flag",
   626  	"CPUIDFeature\nFlag", "CPUID Feature Flag",
   628  	"Compat/\nLeg Mode*", "Compat/Leg Mode",
   629  	"Compat/\nLeg Mode", "Compat/Leg Mode",
   630  	"Compat/ *\nLeg Mode", "Compat/Leg Mode",
   631  )
   633  func halfMissing(x []string) bool {
   634  	n := 0
   635  	for _, s := range x {
   636  		if s == "" {
   637  			n++
   638  		}
   639  	}
   640  	return n >= len(x)/2
   641  }
   643  func findEncodingTable(text []pdf.Text) [][]string {
   644  	// Look for operand encoding table.
   645  	sort.Sort(pdf.TextVertical(text))
   646  	var col []float64
   647  	sawTitle := false
   649  	center := func(t pdf.Text) float64 {
   650  		return t.X + t.W/2
   651  	}
   653  	start := 0
   654  	end := len(text)
   655  	for i, t := range text {
   656  		if match(t, "NeoSansIntelMedium", 10, "Instruction Operand Encoding") {
   657  			sawTitle = true
   658  			start = i + 1
   659  			continue
   660  		}
   661  		if !sawTitle {
   662  			continue
   663  		}
   664  		if match(t, "NeoSansIntel", 9, "Op/En") || match(t, "NeoSansIntel", 9, "Operand") {
   665  			if debugging {
   666  				fmt.Printf("column %d at %.2f: %v\n", len(col), center(t), t)
   667  			}
   668  			col = append(col, center(t))
   669  		}
   670  		if match(t, "NeoSansIntelMedium", 10, "Description") {
   671  			end = i
   672  			break
   673  		}
   674  	}
   675  	text = text[start:end]
   677  	if len(col) == 0 {
   678  		return nil
   679  	}
   681  	const nudge = 20
   683  	y := -100000.0
   684  	var table [][]string
   685  	var line []string
   686  	for _, t := range text {
   687  		if t.Y != y {
   688  			table = append(table, make([]string, len(col)))
   689  			line = table[len(table)-1]
   690  			y = t.Y
   691  		}
   692  		i := 0
   693  		x := center(t)
   694  		for i+1 < len(col) && col[i+1] <= x+nudge {
   695  			i++
   696  		}
   697  		if debugging {
   698  			fmt.Printf("text at %.2f: %v => %d\n", x, t, i)
   699  		}
   700  		if line[i] != "" {
   701  			line[i] += " "
   702  		}
   703  		line[i] += t.S
   704  	}
   706  	out := table[:0]
   707  	for _, line := range table {
   708  		if strings.HasPrefix(line[len(line)-1], "Vol. 2") { // page footer
   709  			continue
   710  		}
   711  		if line[0] == "" && len(out) > 0 {
   712  			last := out[len(out)-1]
   713  			for i, col := range line {
   714  				if col != "" {
   715  					last[i] += " " + col
   716  				}
   717  			}
   718  			continue
   719  		}
   720  		out = append(out, line)
   721  	}
   722  	table = out
   724  	return table
   725  }
   727  func findCompat(text []pdf.Text) string {
   728  	sort.Sort(pdf.TextVertical(text))
   730  	inCompat := false
   731  	out := ""
   732  	for _, t := range text {
   733  		if match(t, "NeoSansIntelMedium", 10, "") {
   734  			inCompat = strings.Contains(t.S, "Architecture Compatibility")
   735  			if inCompat {
   736  				out += t.S + "\n"
   737  			}
   738  		}
   739  		if inCompat && match(t, "Verdana", 9, "") || strings.Contains(t.S, "were introduced") {
   740  			out += t.S + "\n"
   741  		}
   742  	}
   743  	return out
   744  }
   746  func processListing(p *listing, insts *[]*instruction) {
   747  	if debugging {
   748  		for _, table := range p.mtables {
   749  			fmt.Printf("table:\n")
   750  			for _, row := range table {
   751  				fmt.Printf("%q\n", row)
   752  			}
   753  		}
   754  		fmt.Printf("enctable:\n")
   755  		for _, table := range p.enctables {
   756  			for _, row := range table {
   757  				fmt.Printf("%q\n", row)
   758  			}
   759  		}
   760  		fmt.Printf("compat:\n%s", p.compat)
   761  	}
   763  	if *flagCompat && p.compat != "" {
   764  		fmt.Printf("# p.%d: %s\n#\t%s\n", p.pageNum, p.name, strings.Replace(p.compat, "\n", "\n#\t", -1))
   765  	}
   767  	encs := make(map[string][]string)
   768  	for _, table := range p.enctables {
   769  		for _, row := range table[1:] {
   770  			for len(row) > 1 && (row[len(row)-1] == "NA" || row[len(row)-1] == "" || row[len(row)-1] == " source") {
   771  				row = row[:len(row)-1]
   772  			}
   773  			encs[row[0]] = row[1:]
   774  		}
   775  	}
   777  	var wrong string
   778  	for _, table := range p.mtables {
   779  		heading := table[0]
   780  		for _, row := range table[1:] {
   781  			if row[0] == heading[0] && reflect.DeepEqual(row, heading) {
   782  				continue
   783  			}
   784  			if len(row) >= 5 && row[1] == "CMOVG r64, r/m64" && row[3] == "V/N.E." && row[4] == "NA" {
   785  				row[3] = "V"
   786  				row[4] = "N.E."
   787  			}
   788  			inst := new(instruction)
   789  			inst.page = p.pageNum
   790  			inst.compat = strings.Join(strings.Fields(p.compat), " ")
   791  			for i, hdr := range heading {
   792  				x := row[i]
   793  				x = strings.Replace(x, "\n", " ", -1)
   794  				switch strings.TrimSpace(hdr) {
   795  				default:
   796  					wrong = "unexpected header: " + strconv.Quote(hdr)
   797  					goto BadTable
   798  				case "Opcode/Instruction":
   799  					x = row[i]
   800  					if strings.HasPrefix(x, "\nVEX") {
   801  						x = x[1:]
   802  						row[i] = x
   803  					}
   804  					if strings.Contains(x, "\n/r ") {
   805  						x = strings.Replace(x, "\n/r ", " /r ", -1)
   806  						row[i] = x
   807  					}
   808  					if strings.Contains(x, ",\nimm") {
   809  						x = strings.Replace(x, ",\nimm", ", imm", -1)
   810  						row[i] = x
   811  					}
   812  					if strings.Count(x, "\n") < 1 {
   813  						wrong = "bad Opcode/Instruction pairing: " + strconv.Quote(x)
   814  						goto BadTable
   815  					}
   816  					i := strings.Index(x, "\n")
   817  					inst.opcode = x[:i]
   818  					inst.syntax = strings.Replace(x[i+1:], "\n", " ", -1)
   820  				case "Opcode":
   821  					inst.opcode = x
   823  				case "Instruction":
   824  					inst.syntax = x
   826  				case "Op/En":
   827  					inst.args = encs[x]
   828  					if inst.args == nil && len(encs) == 1 && encs["A"] != nil {
   829  						inst.args = encs["A"]
   830  					}
   831  					// In the December 2015 manual, PREFETCHW says
   832  					// encoding A but the table gives encoding M.
   833  					if inst.args == nil && inst.syntax == "PREFETCHW m8" && x == "A" && len(encs) == 1 && encs["M"] != nil {
   834  						inst.args = encs["M"]
   835  					}
   837  				case "64-Bit Mode":
   838  					x, ok := parseMode(x)
   839  					if !ok {
   840  						wrong = "unexpected value for 64-Bit Mode column: " + x
   841  						goto BadTable
   842  					}
   843  					inst.valid64 = x
   845  				case "Compat/Leg Mode":
   846  					x, ok := parseMode(x)
   847  					if !ok {
   848  						wrong = "unexpected value for Compat/Leg Mode column: " + x
   849  						goto BadTable
   850  					}
   851  					inst.valid32 = x
   853  				case "64/32-Bit Mode":
   854  					i := strings.Index(x, "/")
   855  					if i < 0 {
   856  						wrong = "unexpected value for 64/32-Bit Mode column: " + x
   857  						goto BadTable
   858  					}
   859  					x1, ok1 := parseMode(x[:i])
   860  					x2, ok2 := parseMode(x[i+1:])
   861  					if !ok1 || !ok2 {
   862  						wrong = "unexpected value for 64/32-Bit Mode column: " + x
   863  						goto BadTable
   864  					}
   865  					inst.valid64 = x1
   866  					inst.valid32 = x2
   868  				case "CPUID Feature Flag":
   869  					inst.cpuid = x
   871  				case "Description":
   872  					if inst.desc != "" {
   873  						inst.desc += " "
   874  					}
   875  					inst.desc += x
   876  				}
   877  			}
   879  			// Fixup various typos or bugs in opcode descriptions.
   880  			if inst.opcode == "VEX.128.66.0F.W0 6E /" {
   881  				inst.opcode += "r"
   882  			}
   883  			fix := func(old, new string) {
   884  				inst.opcode = strings.Replace(inst.opcode, old, new, -1)
   885  			}
   886  			fix(" imm8", " ib")
   887  			fix("REX.w", "REX.W")
   888  			fix("REX.W+", "REX.W +")
   889  			fix(" 0f ", " 0F ")
   890  			fix(". 0F38", ".0F38")
   891  			fix("0F .WIG", "0F.WIG")
   892  			fix("0F38 .WIG", "0F38.WIG")
   893  			fix("NDS .LZ", "NDS.LZ")
   894  			fix("58+ r", "58+r")
   895  			fix("B0+ ", "B0+")
   896  			fix("B8+ ", "B8+")
   897  			fix("40+ ", "40+")
   898  			fix("*", "")
   899  			fix(",", " ")
   900  			fix("/", " /")
   901  			fix("REX.W +", "REX.W")
   902  			fix("REX +", "REX")
   903  			fix("REX 0F BE", "REX.W 0F BE")
   904  			fix("REX 0F B2", "REX.W 0F B2")
   905  			fix("REX 0F B4", "REX.W 0F B4")
   906  			fix("REX 0F B5", "REX.W 0F B5")
   907  			fix("0F38.0", "0F38.W0")
   908  			fix(".660F.", ".66.0F.")
   909  			fix("VEX128", "VEX.128")
   910  			fix("0F3A.W0.1D", "0F3A.W0 1D")
   912  			inst.opcode = strings.Join(strings.Fields(inst.opcode), " ")
   914  			fix = func(old, new string) {
   915  				inst.syntax = strings.Replace(inst.syntax, old, new, -1)
   916  			}
   917  			fix("xmm1 xmm2", "xmm1, xmm2")
   918  			fix("r16/m16", "r/m16")
   919  			fix("r32/m161", "r32/m16") // really r32/m16¹ (footnote)
   920  			fix("r32/m32", "r/m32")
   921  			fix("r64/m64", "r/m64")
   922  			fix("\u2013", "-")
   923  			fix("mm3 /m", "mm3/m")
   924  			fix("mm3/.m", "mm3/m")
   925  			inst.syntax = joinSyntax(splitSyntax(inst.syntax))
   927  			fix = func(old, new string) {
   928  				inst.cpuid = strings.Replace(inst.cpuid, old, new, -1)
   929  			}
   930  			fix("PCLMUL- QDQ", "PCLMULQDQ")
   931  			fix("PCL- MULQDQ", "PCLMULQDQ")
   932  			fix("Both PCLMULQDQ and AVX flags", "PCLMULQDQ+AVX")
   934  			if !instBlacklist[inst.syntax] {
   935  				*insts = append(*insts, inst)
   936  			}
   937  		}
   938  	}
   939  	return
   941  BadTable:
   942  	fmt.Fprintf(os.Stderr, "p.%d: reading %v: %v\n", p.pageNum, p.name, wrong)
   943  	for _, table := range p.mtables {
   944  		for _, t := range table {
   945  			fmt.Fprintf(os.Stderr, "\t%q\n", t)
   946  		}
   947  	}
   948  	fmt.Fprintf(os.Stderr, "\n")
   949  }
   951  func parseMode(s string) (string, bool) {
   952  	switch strings.TrimSpace(s) {
   953  	case "Invalid", "Invalid*", "Inv.", "I", "i":
   954  		return "I", true
   955  	case "Valid", "Valid*", "V":
   956  		return "V", true
   957  	case "N.E.", "NE", "N. E.":
   958  		return "N.E.", true
   959  	case "N.P.", "N. P.":
   960  		return "N.P.", true
   961  	case "N.S.", "N. S.":
   962  		return "N.S.", true
   963  	case "N.I.", "N. I.":
   964  		return "N.I.", true
   965  	}
   966  	return s, false
   967  }
   969  func splitSyntax(syntax string) (op string, args []string) {
   970  	i := strings.Index(syntax, " ")
   971  	if i < 0 {
   972  		return syntax, nil
   973  	}
   974  	op, syntax = syntax[:i], syntax[i+1:]
   975  	args = strings.Split(syntax, ",")
   976  	for i, arg := range args {
   977  		arg = strings.TrimSpace(arg)
   978  		arg = strings.TrimRight(arg, "*")
   979  		args[i] = arg
   980  	}
   981  	return
   982  }
   984  func joinSyntax(op string, args []string) string {
   985  	if len(args) == 0 {
   986  		return op
   987  	}
   988  	return op + " " + strings.Join(args, ", ")
   989  }

View as plain text