...

Source file src/golang.org/x/arch/ppc64/ppc64spec/spec.go

Documentation: golang.org/x/arch/ppc64/ppc64spec

     1  // Copyright 2014 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Power64spec reads the “Power ISA V2.07” Manual
     6  // to collect instruction encoding details and writes those details to standard output
     7  // in CSV format.
     8  //
     9  // Usage:
    10  //
    11  //	ppc64spec PowerISA_V2.07_PUBLIC.pdf >ppc64.csv
    12  //
    13  // Each CSV line contains four fields:
    14  //
    15  //	instruction
    16  //		The instruction heading, such as "AAD imm8".
    17  //	mnemonic
    18  //		The instruction mnemonics, separated by | symbols.
    19  //	encoding
    20  //		The instruction encoding, a sequence of name@startbit| describing each bit field in turn.
    21  //	tags
    22  //		For now, empty.
    23  //
    24  // For more on the exact meaning of these fields, see the Power manual.
    25  package main
    26  
    27  import (
    28  	"bufio"
    29  	"fmt"
    30  	"log"
    31  	"math"
    32  	"os"
    33  	"regexp"
    34  	"sort"
    35  	"strconv"
    36  	"strings"
    37  
    38  	"rsc.io/pdf"
    39  )
    40  
    41  type Inst struct {
    42  	Name string
    43  	Text string
    44  	Enc  string
    45  }
    46  
    47  const debugPage = 0
    48  
    49  var stdout *bufio.Writer
    50  
    51  func main() {
    52  	log.SetFlags(0)
    53  	log.SetPrefix("ppc64spec: ")
    54  
    55  	if len(os.Args) != 2 {
    56  		fmt.Fprintf(os.Stderr, "usage: ppc64spec file.pdf\n")
    57  		os.Exit(2)
    58  	}
    59  
    60  	f, err := pdf.Open(os.Args[1])
    61  	if err != nil {
    62  		log.Fatal(err)
    63  	}
    64  
    65  	// Find instruction set reference in outline, to build instruction list.
    66  	instList := instHeadings(f.Outline())
    67  	if len(instList) < 200 {
    68  		log.Fatalf("only found %d instructions in table of contents", len(instList))
    69  	}
    70  
    71  	var all = []Inst{
    72  		// Split across multiple columns and pages!
    73  		{"Count Leading Zeros Word X-form", "cntlzw RA, RS (Rc=0)\ncntlzw. RA, RS (Rc=1)", "31@0|RS@6|RA@11|///@16|26@21|Rc@31|"},
    74  	}
    75  
    76  	for j, headline := range instList {
    77  		for _, inst := range all {
    78  			if headline == inst.Name {
    79  				instList[j] = ""
    80  				break
    81  			}
    82  		}
    83  	}
    84  
    85  	// Scan document looking for instructions.
    86  	// Must find exactly the ones in the outline.
    87  	n := f.NumPage()
    88  	for pageNum := 1; pageNum <= n; pageNum++ {
    89  		if debugPage > 0 && pageNum != debugPage {
    90  			continue
    91  		}
    92  		p := f.Page(pageNum)
    93  		table := parsePage(pageNum, p)
    94  		if len(table) == 0 {
    95  			continue
    96  		}
    97  	InstLoop:
    98  		for _, inst := range table {
    99  			for j, headline := range instList {
   100  				if inst.Name == headline {
   101  					instList[j] = ""
   102  					continue InstLoop
   103  				}
   104  			}
   105  			fmt.Fprintf(os.Stderr, "page %d: unexpected instruction %q\n", pageNum, inst.Name)
   106  		}
   107  		all = append(all, table...)
   108  	}
   109  
   110  	if debugPage == 0 {
   111  		for _, headline := range instList {
   112  			if headline != "" {
   113  				switch headline {
   114  				default:
   115  					fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline)
   116  				case "CHKA": // ThumbEE
   117  				case "CPS": // system instruction
   118  				case "CPY": // synonym for MOV
   119  				case "ENTERX": // ThumbEE
   120  				case "F* (former VFP instruction mnemonics)": // synonyms
   121  				case "HB, HBL, HBLP, HBP": // ThumbEE
   122  				case "LEAVEX": // ThumbEE
   123  				case "MOV (shifted register)": // pseudo instruction for ASR, LSL, LSR, ROR, and RRX
   124  				case "NEG": // synonym for RSB
   125  				case "RFE": // system instruction
   126  				case "SMC (previously SMI)": // system instruction
   127  				case "SRS": // system instruction
   128  				case "SUBS PC, LR and related instructions": // system instruction
   129  				case "VAND (immediate)": // pseudo instruction
   130  				case "VCLE (register)": // pseudo instruction
   131  				case "VCLT (register)": // pseudo instruction
   132  				case "VORN (immediate)": // pseudo instruction
   133  				}
   134  			}
   135  		}
   136  	}
   137  
   138  	stdout = bufio.NewWriter(os.Stdout)
   139  	for _, inst := range all {
   140  		fmt.Fprintf(stdout, "%q,%q,%q,%q\n", inst.Name, strings.Replace(inst.Text, "\n", "|", -1), inst.Enc, "")
   141  	}
   142  	stdout.Flush()
   143  
   144  }
   145  
   146  func instHeadings(outline pdf.Outline) []string {
   147  	return appendInstHeadings(outline, nil)
   148  }
   149  
   150  var instRE = regexp.MustCompile(` ([A-Z0-9]+-form|Byte|Word|Doubleword|Halfword)($| \[)`)
   151  var sectionRE = regexp.MustCompile(`^[0-9A-Z]+\.[0-9]`)
   152  
   153  func appendInstHeadings(outline pdf.Outline, list []string) []string {
   154  	if strings.Contains(outline.Title, "Variable Length Encoding (VLE) Encoding") {
   155  		for _, child := range outline.Child {
   156  			vle = appendInstHeadings(child, vle)
   157  		}
   158  		return list
   159  	}
   160  	if instRE.MatchString(outline.Title) && !sectionRE.MatchString(outline.Title) {
   161  		list = append(list, outline.Title)
   162  	}
   163  	if outline.Title == "Transaction Abort Word Conditional" {
   164  		list = append(list, outline.Title+" X-form")
   165  	}
   166  	for _, child := range outline.Child {
   167  		list = appendInstHeadings(child, list)
   168  	}
   169  	return list
   170  }
   171  
   172  const inch = 72.0
   173  
   174  func parsePage(num int, p pdf.Page) []Inst {
   175  	content := p.Content()
   176  
   177  	var text []pdf.Text
   178  	for _, t := range content.Text {
   179  		text = append(text, t)
   180  	}
   181  
   182  	text = findWords(text)
   183  
   184  	if debugPage > 0 {
   185  		for _, t := range text {
   186  			fmt.Println(t)
   187  		}
   188  		for _, r := range content.Rect {
   189  			fmt.Println(r)
   190  		}
   191  	}
   192  
   193  	// Look for instruction encodings.
   194  	// Some begin with a Helvetica-BoldOblique size 11 headline like "AND   X-Form",
   195  	// is followed by Helvetica 9 mnemonic, and then a bit box with
   196  	// Helvetica 9 fields and Helvetica 7 bit offsets.
   197  	// Others use Arial,BoldItalic 11 for the headline,
   198  	// Arial 8 for the mnemonic, and Arial 4.2 for the bit offsets.
   199  
   200  	var insts []Inst
   201  	for {
   202  		// Heading
   203  		for len(text) > 0 && !match(text[0], "Helvetica-BoldOblique", 11, "") && !match(text[0], "Arial,BoldItalic", 11, "") && !match(text[0], "Arial,BoldItalic", 10, "") {
   204  			text = text[1:]
   205  		}
   206  		if len(text) == 0 {
   207  			break
   208  		}
   209  		heading := text[0].S
   210  		text = text[1:]
   211  		for len(text) > 0 && (match(text[0], "Helvetica-BoldOblique", 11, "") || match(text[0], "Arial,BoldItalic", 11, "") || match(text[0], "Arial,BoldItalic", 10, "")) {
   212  			heading += " " + text[0].S
   213  			text = text[1:]
   214  		}
   215  		heading = strings.Replace(heading, "]", "] ", -1)
   216  		heading = strings.Replace(heading, "  ", " ", -1)
   217  		heading = strings.Replace(heading, "rEVX-form", "r EVX-form", -1)
   218  		heading = strings.Replace(heading, "eX-form", "e X-form", -1)
   219  		heading = strings.Replace(heading, "mSD4-form", "m SD4-form", -1)
   220  		heading = strings.Replace(heading, "eSCI8-form", "e SCI8-form", -1)
   221  		heading = strings.TrimSpace(heading)
   222  		if isVLE(heading) {
   223  			continue
   224  		}
   225  
   226  		// Mnemonic
   227  		if len(text) == 0 || (!match(text[0], "Helvetica", 9, "") && !match(text[0], "Helvetica-BoldOblique", 9, "") && !match(text[0], "Arial", 9, "") && !match(text[0], "Arial", 10, "")) {
   228  			continue
   229  		}
   230  		mnemonic := ""
   231  		y := text[0].Y
   232  		x0 := text[0].X
   233  		for len(text) > 0 && (match(text[0], "Helvetica", 9, "") || match(text[0], "Helvetica-BoldOblique", 9, "") || match(text[0], "Arial", 9, "") || match(text[0], "Courier", 8, "") || match(text[0], "LucidaConsole", 7.17, "") || text[0].Y == y) {
   234  			if text[0].Y != y {
   235  				if math.Abs(text[0].X-x0) > 4 {
   236  					break
   237  				}
   238  				mnemonic += "\n"
   239  				y = text[0].Y
   240  			} else if mnemonic != "" {
   241  				mnemonic += " "
   242  			}
   243  			mnemonic += text[0].S
   244  			text = text[1:]
   245  		}
   246  
   247  		// Encoding
   248  		bits, i := readBitBox(heading, content, text, num)
   249  		if i == 0 {
   250  			continue
   251  		}
   252  
   253  		insts = append(insts, Inst{heading, mnemonic, bits})
   254  	}
   255  	return insts
   256  }
   257  
   258  var vle = []string{
   259  	"System Call C-form,ESC-form",
   260  }
   261  
   262  func isVLE(s string) bool {
   263  	for _, v := range vle {
   264  		if s == v {
   265  			return true
   266  		}
   267  	}
   268  	return false
   269  }
   270  
   271  func readBitBox(headline string, content pdf.Content, text []pdf.Text, pageNum int) (string, int) {
   272  	// fields
   273  	i := 0
   274  	if len(text) == 0 || (!match(text[i], "Helvetica", 9, "") && !match(text[i], "Helvetica", 7.26, "") && !match(text[i], "Arial", 9, "") && !match(text[i], "Arial", 7.98, "") && !match(text[i], "Arial", 7.2, "")) {
   275  		fmt.Fprintf(os.Stderr, "page %d: no bit fields for %q\n", pageNum, headline)
   276  		if len(text) > 0 {
   277  			fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[0])
   278  		}
   279  		return "", 0
   280  	}
   281  	sz := text[i].FontSize
   282  	y2 := text[i].Y
   283  	x2 := 0.0
   284  	for i < len(text) && text[i].Y == y2 {
   285  		if x2 < text[i].X+text[i].W {
   286  			x2 = text[i].X + text[i].W
   287  		}
   288  		i++
   289  	}
   290  	y2 += sz / 2
   291  
   292  	// bit numbers
   293  	if i >= len(text) || text[i].S != "0" {
   294  		if headline == "Transaction Abort Doubleword Conditional X-form" {
   295  			// Split across the next page.
   296  			return "31@0|TO@6|RA@11|RB@16|814@21|1@31|", i
   297  		}
   298  		if headline == "Add Scaled Immediate SCI8-form" {
   299  			// Very strange fonts.
   300  			return "06@0|RT@6|RA@11|8@16|Rc@20|F@21|SCL@22|UI8@24|", i
   301  		}
   302  		fmt.Fprintf(os.Stderr, "page %d: no bit numbers for %s\n", pageNum, headline)
   303  		if i < len(text) {
   304  			fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[i])
   305  		}
   306  		return "", 0
   307  	}
   308  	sz = text[i].FontSize
   309  	y1 := text[i].Y
   310  	x1 := text[i].X
   311  	for i < len(text) && text[i].Y == y1 {
   312  		if x2 < text[i].X+text[i].W {
   313  			x2 = text[i].X + text[i].W
   314  		}
   315  		i++
   316  	}
   317  
   318  	if debugPage > 0 {
   319  		fmt.Println("encoding box", x1, y1, x2, y2, i, text[0], text[i])
   320  	}
   321  
   322  	// Find lines (thin rectangles) separating bit fields.
   323  	var bottom, top pdf.Rect
   324  	const (
   325  		yMargin = 0.25 * 72
   326  		xMargin = 1 * 72
   327  	)
   328  	for _, r := range content.Rect {
   329  		// Only consider lines in the same column.
   330  		if (x1 < 306) != (r.Max.X < 306) {
   331  			continue
   332  		}
   333  		if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
   334  			if y1-yMargin < r.Min.Y && r.Min.Y < y1 {
   335  				bottom = r
   336  			}
   337  			if y2 < r.Min.Y && r.Min.Y < y2+8 {
   338  				top = r
   339  			}
   340  		}
   341  	}
   342  
   343  	if bottom.Min.X == 0 {
   344  		// maybe bit numbers are outside box; see doze, nap, sleep, rvwinkle.
   345  		for _, r := range content.Rect {
   346  			// Only consider lines in the same column.
   347  			if (x1 < 306) != (r.Max.X < 306) {
   348  				continue
   349  			}
   350  			if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
   351  				if y1+sz/2 < r.Min.Y && r.Min.Y < y2 {
   352  					bottom = r
   353  				}
   354  			}
   355  		}
   356  	}
   357  
   358  	if debugPage > 0 {
   359  		fmt.Println("top", top, "bottom", bottom)
   360  	}
   361  
   362  	const ε = 0.1 * 72
   363  	var bars []pdf.Rect
   364  	for _, r := range content.Rect {
   365  		if r.Max.X-r.Min.X < 2 && math.Abs(r.Min.Y-bottom.Min.Y) < ε && math.Abs(r.Max.Y-top.Min.Y) < ε && (bottom.Min.X < 306) == (r.Max.X < 306) {
   366  			bars = append(bars, r)
   367  		}
   368  	}
   369  	sort.Sort(RectHorizontal(bars))
   370  
   371  	out := ""
   372  	for i := 0; i < len(bars)-1; i++ {
   373  		var sub []pdf.Text
   374  		x1, x2 := bars[i].Min.X, bars[i+1].Min.X
   375  		for _, t := range content.Text {
   376  			tx := t.X + t.W/2
   377  			ty := t.Y + t.FontSize/4
   378  			if x1 < tx && tx < x2 && y1 < ty && ty < y2 {
   379  				sub = append(sub, t)
   380  			}
   381  		}
   382  		var str []string
   383  		for _, t := range findWords(sub) {
   384  			str = append(str, t.S)
   385  		}
   386  		s := strings.Join(str, "@")
   387  		out += s + "|"
   388  	}
   389  
   390  	if out == "" {
   391  		fmt.Fprintf(os.Stderr, "page %d: no bit encodings for %s\n", pageNum, headline)
   392  	}
   393  	return out, i
   394  }
   395  
   396  type RectHorizontal []pdf.Rect
   397  
   398  func (x RectHorizontal) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   399  func (x RectHorizontal) Less(i, j int) bool { return x[i].Min.X < x[j].Min.X }
   400  func (x RectHorizontal) Len() int           { return len(x) }
   401  
   402  func checkNoEncodings(num int, text []pdf.Text) {
   403  	for _, t := range text {
   404  		if match(t, "Helvetica-Bold", 9, "Encoding") {
   405  			fmt.Fprintf(os.Stderr, "page %d: unexpected encoding: %s\n", num, t.S)
   406  		}
   407  	}
   408  }
   409  
   410  func match(t pdf.Text, font string, size float64, substr string) bool {
   411  	return t.Font == font && (size == 0 || math.Abs(t.FontSize-size) < 0.1) && strings.Contains(t.S, substr)
   412  }
   413  
   414  func findWords(chars []pdf.Text) (words []pdf.Text) {
   415  	// Sort by Y coordinate and normalize.
   416  	const nudge = 1.5
   417  	sort.Sort(pdf.TextVertical(chars))
   418  	old := -100000.0
   419  	for i, c := range chars {
   420  		if c.Y != old && math.Abs(old-c.Y) < nudge {
   421  			chars[i].Y = old
   422  		} else {
   423  			old = c.Y
   424  		}
   425  	}
   426  
   427  	// Sort by Y coordinate, breaking ties with X.
   428  	// This will bring letters in a single word together.
   429  	sort.Sort(pdf.TextVertical(chars))
   430  
   431  	// Loop over chars.
   432  	for i := 0; i < len(chars); {
   433  		// Find all chars on line.
   434  		j := i + 1
   435  		for j < len(chars) && chars[j].Y == chars[i].Y {
   436  			j++
   437  		}
   438  		var end float64
   439  		// Split line into words (really, phrases).
   440  		for k := i; k < j; {
   441  			ck := &chars[k]
   442  			s := ck.S
   443  			end = ck.X + ck.W
   444  			charSpace := ck.FontSize / 6
   445  			wordSpace := ck.FontSize * 2 / 3
   446  			l := k + 1
   447  			for l < j {
   448  				// Grow word.
   449  				cl := &chars[l]
   450  				if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
   451  					s += cl.S
   452  					end = cl.X + cl.W
   453  					l++
   454  					continue
   455  				}
   456  				// Add space to phrase before next word.
   457  				if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
   458  					s += " " + cl.S
   459  					end = cl.X + cl.W
   460  					l++
   461  					continue
   462  				}
   463  				break
   464  			}
   465  			f := ck.Font
   466  			f = strings.TrimSuffix(f, ",Italic")
   467  			f = strings.TrimSuffix(f, "-Italic")
   468  			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
   469  			k = l
   470  		}
   471  		i = j
   472  	}
   473  
   474  	// Split into two columns.
   475  	var col1, col2 []pdf.Text
   476  	for _, w := range words {
   477  		if w.X > 306 {
   478  			col2 = append(col2, w)
   479  		} else {
   480  			col1 = append(col1, w)
   481  		}
   482  	}
   483  	return append(col1, col2...)
   484  }
   485  
   486  func sameFont(f1, f2 string) bool {
   487  	f1 = strings.TrimSuffix(f1, ",Italic")
   488  	f1 = strings.TrimSuffix(f1, "-Italic")
   489  	f2 = strings.TrimSuffix(f1, ",Italic")
   490  	f2 = strings.TrimSuffix(f1, "-Italic")
   491  	return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
   492  }
   493  
   494  var jsFix = strings.NewReplacer(
   495  // `\u003c`, `<`,
   496  // `\u003e`, `>`,
   497  // `\u0026`, `&`,
   498  // `\u0009`, `\t`,
   499  )
   500  
   501  func printTable(name string, table []Inst) {
   502  	_ = strconv.Atoi
   503  }
   504  

View as plain text