1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package main
26
27 import (
28 "bufio"
29 "fmt"
30 "log"
31 "math"
32 "os"
33 "regexp"
34 "sort"
35 "strconv"
36 "strings"
37
38 "rsc.io/pdf"
39 )
40
41 type Inst struct {
42 Name string
43 Text string
44 Enc string
45 }
46
47 const debugPage = 0
48
49 var stdout *bufio.Writer
50
51 func main() {
52 log.SetFlags(0)
53 log.SetPrefix("ppc64spec: ")
54
55 if len(os.Args) != 2 {
56 fmt.Fprintf(os.Stderr, "usage: ppc64spec file.pdf\n")
57 os.Exit(2)
58 }
59
60 f, err := pdf.Open(os.Args[1])
61 if err != nil {
62 log.Fatal(err)
63 }
64
65
66 instList := instHeadings(f.Outline())
67 if len(instList) < 200 {
68 log.Fatalf("only found %d instructions in table of contents", len(instList))
69 }
70
71 var all = []Inst{
72
73 {"Count Leading Zeros Word X-form", "cntlzw RA, RS (Rc=0)\ncntlzw. RA, RS (Rc=1)", "31@0|RS@6|RA@11|///@16|26@21|Rc@31|"},
74 }
75
76 for j, headline := range instList {
77 for _, inst := range all {
78 if headline == inst.Name {
79 instList[j] = ""
80 break
81 }
82 }
83 }
84
85
86
87 n := f.NumPage()
88 for pageNum := 1; pageNum <= n; pageNum++ {
89 if debugPage > 0 && pageNum != debugPage {
90 continue
91 }
92 p := f.Page(pageNum)
93 table := parsePage(pageNum, p)
94 if len(table) == 0 {
95 continue
96 }
97 InstLoop:
98 for _, inst := range table {
99 for j, headline := range instList {
100 if inst.Name == headline {
101 instList[j] = ""
102 continue InstLoop
103 }
104 }
105 fmt.Fprintf(os.Stderr, "page %d: unexpected instruction %q\n", pageNum, inst.Name)
106 }
107 all = append(all, table...)
108 }
109
110 if debugPage == 0 {
111 for _, headline := range instList {
112 if headline != "" {
113 switch headline {
114 default:
115 fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline)
116 case "CHKA":
117 case "CPS":
118 case "CPY":
119 case "ENTERX":
120 case "F* (former VFP instruction mnemonics)":
121 case "HB, HBL, HBLP, HBP":
122 case "LEAVEX":
123 case "MOV (shifted register)":
124 case "NEG":
125 case "RFE":
126 case "SMC (previously SMI)":
127 case "SRS":
128 case "SUBS PC, LR and related instructions":
129 case "VAND (immediate)":
130 case "VCLE (register)":
131 case "VCLT (register)":
132 case "VORN (immediate)":
133 }
134 }
135 }
136 }
137
138 stdout = bufio.NewWriter(os.Stdout)
139 for _, inst := range all {
140 fmt.Fprintf(stdout, "%q,%q,%q,%q\n", inst.Name, strings.Replace(inst.Text, "\n", "|", -1), inst.Enc, "")
141 }
142 stdout.Flush()
143
144 }
145
146 func instHeadings(outline pdf.Outline) []string {
147 return appendInstHeadings(outline, nil)
148 }
149
150 var instRE = regexp.MustCompile(` ([A-Z0-9]+-form|Byte|Word|Doubleword|Halfword)($| \[)`)
151 var sectionRE = regexp.MustCompile(`^[0-9A-Z]+\.[0-9]`)
152
153 func appendInstHeadings(outline pdf.Outline, list []string) []string {
154 if strings.Contains(outline.Title, "Variable Length Encoding (VLE) Encoding") {
155 for _, child := range outline.Child {
156 vle = appendInstHeadings(child, vle)
157 }
158 return list
159 }
160 if instRE.MatchString(outline.Title) && !sectionRE.MatchString(outline.Title) {
161 list = append(list, outline.Title)
162 }
163 if outline.Title == "Transaction Abort Word Conditional" {
164 list = append(list, outline.Title+" X-form")
165 }
166 for _, child := range outline.Child {
167 list = appendInstHeadings(child, list)
168 }
169 return list
170 }
171
172 const inch = 72.0
173
174 func parsePage(num int, p pdf.Page) []Inst {
175 content := p.Content()
176
177 var text []pdf.Text
178 for _, t := range content.Text {
179 text = append(text, t)
180 }
181
182 text = findWords(text)
183
184 if debugPage > 0 {
185 for _, t := range text {
186 fmt.Println(t)
187 }
188 for _, r := range content.Rect {
189 fmt.Println(r)
190 }
191 }
192
193
194
195
196
197
198
199
200 var insts []Inst
201 for {
202
203 for len(text) > 0 && !match(text[0], "Helvetica-BoldOblique", 11, "") && !match(text[0], "Arial,BoldItalic", 11, "") && !match(text[0], "Arial,BoldItalic", 10, "") {
204 text = text[1:]
205 }
206 if len(text) == 0 {
207 break
208 }
209 heading := text[0].S
210 text = text[1:]
211 for len(text) > 0 && (match(text[0], "Helvetica-BoldOblique", 11, "") || match(text[0], "Arial,BoldItalic", 11, "") || match(text[0], "Arial,BoldItalic", 10, "")) {
212 heading += " " + text[0].S
213 text = text[1:]
214 }
215 heading = strings.Replace(heading, "]", "] ", -1)
216 heading = strings.Replace(heading, " ", " ", -1)
217 heading = strings.Replace(heading, "rEVX-form", "r EVX-form", -1)
218 heading = strings.Replace(heading, "eX-form", "e X-form", -1)
219 heading = strings.Replace(heading, "mSD4-form", "m SD4-form", -1)
220 heading = strings.Replace(heading, "eSCI8-form", "e SCI8-form", -1)
221 heading = strings.TrimSpace(heading)
222 if isVLE(heading) {
223 continue
224 }
225
226
227 if len(text) == 0 || (!match(text[0], "Helvetica", 9, "") && !match(text[0], "Helvetica-BoldOblique", 9, "") && !match(text[0], "Arial", 9, "") && !match(text[0], "Arial", 10, "")) {
228 continue
229 }
230 mnemonic := ""
231 y := text[0].Y
232 x0 := text[0].X
233 for len(text) > 0 && (match(text[0], "Helvetica", 9, "") || match(text[0], "Helvetica-BoldOblique", 9, "") || match(text[0], "Arial", 9, "") || match(text[0], "Courier", 8, "") || match(text[0], "LucidaConsole", 7.17, "") || text[0].Y == y) {
234 if text[0].Y != y {
235 if math.Abs(text[0].X-x0) > 4 {
236 break
237 }
238 mnemonic += "\n"
239 y = text[0].Y
240 } else if mnemonic != "" {
241 mnemonic += " "
242 }
243 mnemonic += text[0].S
244 text = text[1:]
245 }
246
247
248 bits, i := readBitBox(heading, content, text, num)
249 if i == 0 {
250 continue
251 }
252
253 insts = append(insts, Inst{heading, mnemonic, bits})
254 }
255 return insts
256 }
257
258 var vle = []string{
259 "System Call C-form,ESC-form",
260 }
261
262 func isVLE(s string) bool {
263 for _, v := range vle {
264 if s == v {
265 return true
266 }
267 }
268 return false
269 }
270
271 func readBitBox(headline string, content pdf.Content, text []pdf.Text, pageNum int) (string, int) {
272
273 i := 0
274 if len(text) == 0 || (!match(text[i], "Helvetica", 9, "") && !match(text[i], "Helvetica", 7.26, "") && !match(text[i], "Arial", 9, "") && !match(text[i], "Arial", 7.98, "") && !match(text[i], "Arial", 7.2, "")) {
275 fmt.Fprintf(os.Stderr, "page %d: no bit fields for %q\n", pageNum, headline)
276 if len(text) > 0 {
277 fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[0])
278 }
279 return "", 0
280 }
281 sz := text[i].FontSize
282 y2 := text[i].Y
283 x2 := 0.0
284 for i < len(text) && text[i].Y == y2 {
285 if x2 < text[i].X+text[i].W {
286 x2 = text[i].X + text[i].W
287 }
288 i++
289 }
290 y2 += sz / 2
291
292
293 if i >= len(text) || text[i].S != "0" {
294 if headline == "Transaction Abort Doubleword Conditional X-form" {
295
296 return "31@0|TO@6|RA@11|RB@16|814@21|1@31|", i
297 }
298 if headline == "Add Scaled Immediate SCI8-form" {
299
300 return "06@0|RT@6|RA@11|8@16|Rc@20|F@21|SCL@22|UI8@24|", i
301 }
302 fmt.Fprintf(os.Stderr, "page %d: no bit numbers for %s\n", pageNum, headline)
303 if i < len(text) {
304 fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[i])
305 }
306 return "", 0
307 }
308 sz = text[i].FontSize
309 y1 := text[i].Y
310 x1 := text[i].X
311 for i < len(text) && text[i].Y == y1 {
312 if x2 < text[i].X+text[i].W {
313 x2 = text[i].X + text[i].W
314 }
315 i++
316 }
317
318 if debugPage > 0 {
319 fmt.Println("encoding box", x1, y1, x2, y2, i, text[0], text[i])
320 }
321
322
323 var bottom, top pdf.Rect
324 const (
325 yMargin = 0.25 * 72
326 xMargin = 1 * 72
327 )
328 for _, r := range content.Rect {
329
330 if (x1 < 306) != (r.Max.X < 306) {
331 continue
332 }
333 if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
334 if y1-yMargin < r.Min.Y && r.Min.Y < y1 {
335 bottom = r
336 }
337 if y2 < r.Min.Y && r.Min.Y < y2+8 {
338 top = r
339 }
340 }
341 }
342
343 if bottom.Min.X == 0 {
344
345 for _, r := range content.Rect {
346
347 if (x1 < 306) != (r.Max.X < 306) {
348 continue
349 }
350 if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
351 if y1+sz/2 < r.Min.Y && r.Min.Y < y2 {
352 bottom = r
353 }
354 }
355 }
356 }
357
358 if debugPage > 0 {
359 fmt.Println("top", top, "bottom", bottom)
360 }
361
362 const ε = 0.1 * 72
363 var bars []pdf.Rect
364 for _, r := range content.Rect {
365 if r.Max.X-r.Min.X < 2 && math.Abs(r.Min.Y-bottom.Min.Y) < ε && math.Abs(r.Max.Y-top.Min.Y) < ε && (bottom.Min.X < 306) == (r.Max.X < 306) {
366 bars = append(bars, r)
367 }
368 }
369 sort.Sort(RectHorizontal(bars))
370
371 out := ""
372 for i := 0; i < len(bars)-1; i++ {
373 var sub []pdf.Text
374 x1, x2 := bars[i].Min.X, bars[i+1].Min.X
375 for _, t := range content.Text {
376 tx := t.X + t.W/2
377 ty := t.Y + t.FontSize/4
378 if x1 < tx && tx < x2 && y1 < ty && ty < y2 {
379 sub = append(sub, t)
380 }
381 }
382 var str []string
383 for _, t := range findWords(sub) {
384 str = append(str, t.S)
385 }
386 s := strings.Join(str, "@")
387 out += s + "|"
388 }
389
390 if out == "" {
391 fmt.Fprintf(os.Stderr, "page %d: no bit encodings for %s\n", pageNum, headline)
392 }
393 return out, i
394 }
395
396 type RectHorizontal []pdf.Rect
397
398 func (x RectHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
399 func (x RectHorizontal) Less(i, j int) bool { return x[i].Min.X < x[j].Min.X }
400 func (x RectHorizontal) Len() int { return len(x) }
401
402 func checkNoEncodings(num int, text []pdf.Text) {
403 for _, t := range text {
404 if match(t, "Helvetica-Bold", 9, "Encoding") {
405 fmt.Fprintf(os.Stderr, "page %d: unexpected encoding: %s\n", num, t.S)
406 }
407 }
408 }
409
410 func match(t pdf.Text, font string, size float64, substr string) bool {
411 return t.Font == font && (size == 0 || math.Abs(t.FontSize-size) < 0.1) && strings.Contains(t.S, substr)
412 }
413
414 func findWords(chars []pdf.Text) (words []pdf.Text) {
415
416 const nudge = 1.5
417 sort.Sort(pdf.TextVertical(chars))
418 old := -100000.0
419 for i, c := range chars {
420 if c.Y != old && math.Abs(old-c.Y) < nudge {
421 chars[i].Y = old
422 } else {
423 old = c.Y
424 }
425 }
426
427
428
429 sort.Sort(pdf.TextVertical(chars))
430
431
432 for i := 0; i < len(chars); {
433
434 j := i + 1
435 for j < len(chars) && chars[j].Y == chars[i].Y {
436 j++
437 }
438 var end float64
439
440 for k := i; k < j; {
441 ck := &chars[k]
442 s := ck.S
443 end = ck.X + ck.W
444 charSpace := ck.FontSize / 6
445 wordSpace := ck.FontSize * 2 / 3
446 l := k + 1
447 for l < j {
448
449 cl := &chars[l]
450 if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
451 s += cl.S
452 end = cl.X + cl.W
453 l++
454 continue
455 }
456
457 if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
458 s += " " + cl.S
459 end = cl.X + cl.W
460 l++
461 continue
462 }
463 break
464 }
465 f := ck.Font
466 f = strings.TrimSuffix(f, ",Italic")
467 f = strings.TrimSuffix(f, "-Italic")
468 words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
469 k = l
470 }
471 i = j
472 }
473
474
475 var col1, col2 []pdf.Text
476 for _, w := range words {
477 if w.X > 306 {
478 col2 = append(col2, w)
479 } else {
480 col1 = append(col1, w)
481 }
482 }
483 return append(col1, col2...)
484 }
485
486 func sameFont(f1, f2 string) bool {
487 f1 = strings.TrimSuffix(f1, ",Italic")
488 f1 = strings.TrimSuffix(f1, "-Italic")
489 f2 = strings.TrimSuffix(f1, ",Italic")
490 f2 = strings.TrimSuffix(f1, "-Italic")
491 return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
492 }
493
494 var jsFix = strings.NewReplacer(
495
496
497
498
499 )
500
501 func printTable(name string, table []Inst) {
502 _ = strconv.Atoi
503 }
504
View as plain text