...

Source file src/golang.org/x/arch/arm64/arm64spec/spec.go

Documentation: golang.org/x/arch/arm64/arm64spec

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // arm64spec reads the ``ARMv8-A Reference Manual''
     6  // to collect instruction encoding details and writes those
     7  // details to standard output in JSON format.
     8  // usage: arm64spec file.pdf
     9  
    10  package main
    11  
    12  import (
    13  	"bufio"
    14  	"bytes"
    15  	"encoding/json"
    16  	"fmt"
    17  	"log"
    18  	"math"
    19  	"os"
    20  	"regexp"
    21  	"sort"
    22  	"strconv"
    23  	"strings"
    24  
    25  	"rsc.io/pdf"
    26  )
    27  
    28  type Inst struct {
    29  	Name   string
    30  	Bits   string
    31  	Arch   string
    32  	Syntax string
    33  	Code   string
    34  	Alias  string
    35  }
    36  
    37  const debugPage = 0
    38  
    39  var stdout *bufio.Writer
    40  
    41  func check(e error) {
    42  	if e != nil {
    43  		panic(e)
    44  	}
    45  }
    46  
    47  func main() {
    48  	log.SetFlags(0)
    49  	log.SetPrefix("arm64spec: ")
    50  
    51  	if len(os.Args) != 2 {
    52  		fmt.Fprintf(os.Stderr, "usage: arm64spec file.pdf\n")
    53  		os.Exit(2)
    54  	}
    55  	f, err := pdf.Open(os.Args[1])
    56  	if err != nil {
    57  		log.Fatal(err)
    58  	}
    59  
    60  	// Find instruction set reference in outline, to build instruction list.
    61  	instList := instHeadings(f.Outline())
    62  	if debugPage == 0 {
    63  		fmt.Println("the number of instructions:", len(instList))
    64  	}
    65  	if len(instList) < 200 {
    66  		log.Fatalf("only found %d instructions in table of contents", len(instList))
    67  	}
    68  
    69  	file, err := os.Create("inst.json")
    70  	check(err)
    71  	w := bufio.NewWriter(file)
    72  	_, err = w.WriteString("[")
    73  	check(err)
    74  	numTable := 0
    75  	defer w.Flush()
    76  	defer file.Close()
    77  
    78  	// Scan document looking for instructions.
    79  	// Must find exactly the ones in the outline.
    80  	n := f.NumPage()
    81  PageLoop:
    82  	for pageNum := 435; pageNum <= n; pageNum++ {
    83  		if debugPage > 0 && pageNum != debugPage {
    84  			continue
    85  		}
    86  		if pageNum == 770 {
    87  			continue
    88  		}
    89  		if pageNum > 1495 {
    90  			break
    91  		}
    92  		p := f.Page(pageNum)
    93  		name, table := parsePage(pageNum, p, f)
    94  		if name == "" {
    95  			continue
    96  		}
    97  		if len(table) < 1 {
    98  			if false {
    99  				fmt.Fprintf(os.Stderr, "no encodings for instruction %q (page %d)\n", name, pageNum)
   100  			}
   101  			continue
   102  		}
   103  		for _, inst := range table {
   104  			if numTable > 0 {
   105  				_, err = w.WriteString(jsFix.Replace(","))
   106  				check(err)
   107  				_, err = w.WriteString("\n")
   108  				check(err)
   109  			}
   110  			numTable++
   111  			js, _ := json.Marshal(inst)
   112  			_, err = w.WriteString(jsFix.Replace(string(js)))
   113  			check(err)
   114  		}
   115  		for j, headline := range instList {
   116  			if name == headline {
   117  				instList[j] = ""
   118  				continue PageLoop
   119  			}
   120  		}
   121  		fmt.Fprintf(os.Stderr, "unexpected instruction %q (page %d)\n", name, pageNum)
   122  	}
   123  
   124  	_, err = w.WriteString("\n]\n")
   125  	check(err)
   126  	w.Flush()
   127  
   128  	if debugPage == 0 {
   129  		for _, headline := range instList {
   130  			if headline != "" {
   131  				fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline)
   132  			}
   133  		}
   134  	}
   135  }
   136  
   137  func instHeadings(outline pdf.Outline) []string {
   138  	return appendInstHeadings(outline, nil)
   139  }
   140  
   141  var instRE = regexp.MustCompile(`C[\d.]+ Alphabetical list of A64 base instructions`)
   142  var instRE_A = regexp.MustCompile(`C[\d.]+ Alphabetical list of A64 floating-point and Advanced SIMD instructions`)
   143  var childRE = regexp.MustCompile(`C[\d.]+ (.+)`)
   144  var sectionRE = regexp.MustCompile(`^C[\d.]+$`)
   145  var bitRE = regexp.MustCompile(`^( |[01]|\([01]\))*$`)
   146  var IMMRE = regexp.MustCompile(`^imm[\d]+$`)
   147  
   148  func appendInstHeadings(outline pdf.Outline, list []string) []string {
   149  	if instRE.MatchString(outline.Title) || instRE_A.MatchString(outline.Title) {
   150  		for _, child := range outline.Child {
   151  			m := childRE.FindStringSubmatch(child.Title)
   152  			if m == nil {
   153  				fmt.Fprintf(os.Stderr, "cannot parse section title: %s\n", child.Title)
   154  				continue
   155  			}
   156  			list = append(list, m[1])
   157  		}
   158  	}
   159  	for _, child := range outline.Child {
   160  		list = appendInstHeadings(child, list)
   161  	}
   162  	return list
   163  }
   164  
   165  const inch = 72.0
   166  
   167  func parsePage(num int, p pdf.Page, f *pdf.Reader) (name string, table []Inst) {
   168  	content := p.Content()
   169  	var text []pdf.Text
   170  	CrossTwoPage := true
   171  	for _, t := range content.Text {
   172  		text = append(text, t)
   173  	}
   174  	text = findWords(text)
   175  	if !(instRE.MatchString(text[1].S) || instRE_A.MatchString(text[1].S)) || len(text) == 0 || !sectionRE.MatchString(text[2].S) {
   176  		return "", nil
   177  	}
   178  	// Check whether the content crosses the page.
   179  	for _, t := range text {
   180  		if match(t, "Arial,Bold", 10, "Assembler symbols") {
   181  			CrossTwoPage = false
   182  			break
   183  		}
   184  	}
   185  	// Deal with cross page issue. To the next page content.
   186  	var Ncontent pdf.Content
   187  	Npagebox := false
   188  	CrossThreePage := false
   189  	Noffset := ""
   190  	if CrossTwoPage == true {
   191  		Np := f.Page(num + 1)
   192  		Ncontent = Np.Content()
   193  		var Ntext []pdf.Text
   194  		for _, t := range Ncontent.Text {
   195  			Ntext = append(Ntext, t)
   196  		}
   197  		Ntext = findWords(Ntext)
   198  		if len(Ntext) == 0 || sectionRE.MatchString(Ntext[2].S) {
   199  			Ntext = text[:0]
   200  		} else {
   201  			for _, t := range Ntext {
   202  				if match(t, "Arial,Bold", 10, "offset") {
   203  					Noffset = t.S
   204  					Npagebox = true
   205  				}
   206  				// This istruction cross three pages.
   207  				if match(t, "Arial,Bold", 10, "Assembler symbols") {
   208  					CrossThreePage = false
   209  				} else {
   210  					CrossThreePage = true
   211  				}
   212  				text = append(text, t)
   213  			}
   214  		}
   215  	}
   216  	if CrossThreePage == true {
   217  		NNp := f.Page(num + 2)
   218  		NNcontent := NNp.Content()
   219  		var NNtext []pdf.Text
   220  		for _, t := range NNcontent.Text {
   221  			NNtext = append(NNtext, t)
   222  		}
   223  		NNtext = findWords(NNtext)
   224  		if len(NNtext) == 0 || sectionRE.MatchString(NNtext[2].S) {
   225  			NNtext = text[:0]
   226  		} else {
   227  			for _, t := range NNtext {
   228  				text = append(text, t)
   229  			}
   230  		}
   231  	}
   232  	// Get alias and remove text we should ignore.
   233  	out := text[:0]
   234  	alias := ""
   235  	for _, t := range text {
   236  		if strings.Contains(t.S, "instruction is used by the alias") || strings.Contains(t.S, "instruction is an alias of") {
   237  			alias_t := strings.SplitAfter(t.S, ".")
   238  			alias = alias_t[0]
   239  		}
   240  		// Skip page footer
   241  		if match(t, "Arial-ItalicMT", 8, "") || match(t, "ArialMT", 8, "") {
   242  			if debugPage > 0 {
   243  				fmt.Println("==the skip page footer is:==", t)
   244  			}
   245  			continue
   246  		}
   247  		// Skip the body text
   248  		if match(t, "TimesNewRoman", 9, "") || match(t, "TimesNewRomanPS-ItalicMT", 9, "") {
   249  			if debugPage > 0 {
   250  				fmt.Println("==the skip body text is:==", t)
   251  			}
   252  			continue
   253  		}
   254  		out = append(out, t)
   255  	}
   256  	text = out
   257  	// Page header must be child title.
   258  	if len(text) == 0 || !sectionRE.MatchString(text[0].S) {
   259  		return "", nil
   260  	}
   261  
   262  	name = text[1].S
   263  	inst := Inst{
   264  		Name:  name,
   265  		Alias: alias,
   266  	}
   267  	text = text[2:]
   268  	// Skip body text before bits.
   269  	OffsetMark := false
   270  	k := 0
   271  	for k = 0; k < len(text); {
   272  		if !match(text[k], "Arial", 8, "31") {
   273  			k++
   274  		} else {
   275  			break
   276  		}
   277  	}
   278  	// Check offset.
   279  	if k > 0 && match(text[k-1], "Arial,Bold", 10, "") {
   280  		OffsetMark = true
   281  		text = text[k-1:]
   282  	} else {
   283  		text = text[k:]
   284  	}
   285  	// Encodings follow.
   286  	BitMark := false
   287  	bits := ""
   288  	// Find bits.
   289  	for i := 0; i < len(text); {
   290  		inst.Bits = ""
   291  		offset := ""
   292  		abits := ""
   293  		// Read bits only one time.
   294  		if OffsetMark == true {
   295  			for i < len(text) && !match(text[i], "Arial", 8, "") {
   296  				i++
   297  			}
   298  			if i < len(text) {
   299  				offset = text[i-1].S
   300  				BitMark = false
   301  				bits = ""
   302  			} else {
   303  				break
   304  			}
   305  		}
   306  		if BitMark == false {
   307  			if Npagebox == true && Noffset == offset {
   308  				bits, i = readBitBox(name, Ncontent, text, i)
   309  			} else {
   310  				bits, i = readBitBox(name, content, text, i)
   311  			}
   312  			BitMark = true
   313  			// Every time, get "then SEE" after get bits.
   314  			enc := false
   315  			if i < len(text)-1 {
   316  				m := i
   317  				for m < len(text)-1 && !match(text[m], "Arial-BoldItalicMT", 9, "encoding") {
   318  					m++
   319  				}
   320  				if match(text[m], "Arial-BoldItalicMT", 9, "encoding") && m < len(text) {
   321  					enc = true
   322  					m = m + 1
   323  				}
   324  				if enc == true {
   325  					for m < len(text) && !match(text[m], "Arial,Bold", 10, "") && match(text[m], "LucidaSansTypewriteX", 6.48, "") {
   326  						if strings.Contains(text[m].S, "then SEE") {
   327  							inst.Code = text[m].S
   328  							break
   329  						} else {
   330  							m++
   331  						}
   332  					}
   333  				}
   334  			}
   335  		}
   336  
   337  		// Possible subarchitecture notes.
   338  	ArchLoop:
   339  		for i < len(text) {
   340  			if !match(text[i], "Arial-BoldItalicMT", 9, "variant") || match(text[i], "Arial-BoldItalicMT", 9, "encoding") {
   341  				i++
   342  				continue
   343  			}
   344  			inst.Arch = ""
   345  			inst.Arch += offset
   346  			inst.Arch += " "
   347  			inst.Arch += text[i].S
   348  			inst.Arch = strings.TrimSpace(inst.Arch)
   349  			i++
   350  			// Encoding syntaxes.
   351  			sign := ""
   352  			SynMark := false
   353  			for i < len(text) && match(text[i], "LucidaSansTypewriteX", 6.48, "") && SynMark == false {
   354  				if (strings.Contains(text[i].S, "==") || strings.Contains(text[i].S, "!=")) && SynMark == false {
   355  					sign = text[i].S
   356  					i++
   357  					continue
   358  				}
   359  				// Avoid "equivalent to" another syntax.
   360  				if SynMark == false {
   361  					SynMark = true
   362  					inst.Syntax = ""
   363  					inst.Syntax = text[i].S
   364  					i++
   365  				}
   366  			}
   367  			abits = bits
   368  			// Analyse and replace some bits value.eg, sf==1
   369  			if strings.Contains(sign, "&&") {
   370  				split := strings.Split(sign, "&&")
   371  				for k := 0; k < len(split); {
   372  					if strings.Contains(split[k], "==") && !strings.Contains(split[k], "!") {
   373  						tmp := strings.Split(split[k], "==")
   374  						prefix := strings.TrimSpace(tmp[0])
   375  						value := strings.TrimSpace(tmp[1])
   376  						if strings.Contains(bits, prefix) && !strings.Contains(value, "x") {
   377  							abits = strings.Replace(abits, prefix, value, -1)
   378  						}
   379  					}
   380  					k++
   381  				}
   382  			} else if strings.Contains(sign, "==") && !strings.Contains(sign, "!") {
   383  				split := strings.Split(sign, "==")
   384  				prefix := strings.TrimSpace(split[0])
   385  				value := strings.TrimSpace(split[1])
   386  				if strings.Contains(bits, prefix) && !strings.Contains(value, "x") {
   387  					abits = strings.Replace(abits, prefix, value, -1)
   388  				}
   389  			}
   390  			// Deal with syntax contains {2}
   391  			if strings.Contains(inst.Syntax, "{2}") {
   392  				if !strings.Contains(abits, "Q") {
   393  					fmt.Fprintf(os.Stderr, "instruction%s - syntax%s: is wrong!!\n", name, inst.Syntax)
   394  				}
   395  				syn := inst.Syntax
   396  				bits := abits
   397  				for i := 0; i < 2; {
   398  					if i == 0 {
   399  						inst.Bits = strings.Replace(bits, "Q", "0", -1)
   400  						inst.Syntax = strings.Replace(syn, "{2}", "", -1)
   401  						table = append(table, inst)
   402  					}
   403  					if i == 1 {
   404  						inst.Bits = strings.Replace(bits, "Q", "1", -1)
   405  						inst.Syntax = strings.Replace(syn, "{2}", "2", -1)
   406  						table = append(table, inst)
   407  					}
   408  					i++
   409  				}
   410  			} else {
   411  				inst.Bits = abits
   412  				table = append(table, inst)
   413  			}
   414  
   415  			if OffsetMark == true && i < len(text) && match(text[i], "Arial-BoldItalicMT", 9, "variant") && !match(text[i], "Arial-BoldItalicMT", 9, "encoding") {
   416  				continue ArchLoop
   417  			} else {
   418  				break
   419  			}
   420  		}
   421  	}
   422  	return name, table
   423  }
   424  
   425  func readBitBox(name string, content pdf.Content, text []pdf.Text, i int) (string, int) {
   426  	// Bits headings
   427  	y3 := 0.0
   428  	x1 := 0.0
   429  	for i < len(text) && match(text[i], "Arial", 8, "") {
   430  		if y3 == 0 {
   431  			y3 = text[i].Y
   432  		}
   433  		if x1 == 0 {
   434  			x1 = text[i].X
   435  		}
   436  		if text[i].Y != y3 {
   437  			break
   438  		}
   439  		i++
   440  	}
   441  	// Bits fields in box
   442  	x2 := 0.0
   443  	y2 := 0.0
   444  	dy1 := 0.0
   445  	for i < len(text) && match(text[i], "Arial", 8, "") {
   446  		if x2 < text[i].X+text[i].W {
   447  			x2 = text[i].X + text[i].W
   448  		}
   449  		if y2 == 0 {
   450  			y2 = text[i].Y
   451  		}
   452  		if text[i].Y != y2 {
   453  			break
   454  		}
   455  		dy1 = text[i].FontSize
   456  		i++
   457  	}
   458  	// Bits fields below box
   459  	x3 := 0.0
   460  	y1 := 0.0
   461  	for i < len(text) && match(text[i], "Arial", 8, "") {
   462  		if x3 < text[i].X+text[i].W {
   463  			x3 = text[i].X + text[i].W
   464  		}
   465  		y1 = text[i].Y
   466  		if text[i].Y != y1 {
   467  			break
   468  		}
   469  		i++
   470  	}
   471  	//no bits fields below box
   472  	below_flag := true
   473  	if y1 == 0.0 {
   474  		below_flag = false
   475  		y1 = y2
   476  	}
   477  	// Encoding box
   478  	if debugPage > 0 {
   479  		fmt.Println("encoding box", x1, y3, x2, y1)
   480  	}
   481  
   482  	// Find lines (thin rectangles) separating bit fields.
   483  	var bottom, top pdf.Rect
   484  	const (
   485  		yMargin = 0.25 * 72
   486  		xMargin = 2 * 72
   487  	)
   488  	cont := 0
   489  	if below_flag == true {
   490  		for _, r := range content.Rect {
   491  			cont = cont + 1
   492  			if x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
   493  				if y1-yMargin < r.Min.Y && r.Min.Y < y2-dy1 {
   494  					bottom = r
   495  				}
   496  				if y2+dy1 < r.Min.Y && r.Min.Y < y3+yMargin {
   497  					top = r
   498  				}
   499  			}
   500  		}
   501  	} else {
   502  		for _, r := range content.Rect {
   503  			cont = cont + 1
   504  			if x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
   505  				if y1-yMargin-dy1 < r.Min.Y && r.Min.Y < y3-dy1 {
   506  					bottom = r
   507  				}
   508  				if y2+dy1 < r.Min.Y && r.Min.Y < y3+yMargin {
   509  					top = r
   510  				}
   511  			}
   512  		}
   513  	}
   514  
   515  	if debugPage > 0 {
   516  		fmt.Println("top", top, "bottom", bottom, "content.Rect number", cont)
   517  	}
   518  
   519  	const ε = 0.5 * 72
   520  	cont_1 := 0
   521  	var bars []pdf.Rect
   522  	for _, r := range content.Rect {
   523  		if math.Abs(r.Min.X-r.Max.X) < bottom.Max.X-bottom.Min.X-(ε/2) && math.Abs(r.Min.Y-bottom.Min.Y) < ε && math.Abs(r.Max.Y-top.Min.Y) < ε {
   524  			cont_1 = cont_1 + 1
   525  			bars = append(bars, r)
   526  		}
   527  	}
   528  	sort.Sort(RectHorizontal(bars))
   529  	if debugPage > 0 {
   530  		fmt.Println("==bars number==", cont_1)
   531  	}
   532  
   533  	// There are 16-bit and 32-bit encodings.
   534  	// In practice, they are about 2.65 and 5.3 inches wide, respectively.
   535  	// Use 4 inches as a cutoff.
   536  	nbit := 32
   537  	dx := top.Max.X - top.Min.X
   538  	if top.Max.X-top.Min.X < 4*72 {
   539  		nbit = 16
   540  	}
   541  
   542  	total := 0
   543  	var buf bytes.Buffer
   544  	for i := 0; i < len(bars); i++ {
   545  		if i > 0 {
   546  			fmt.Fprintf(&buf, "|")
   547  		}
   548  		var sub []pdf.Text
   549  		x1, x2 := bars[i].Min.X, bars[i].Max.X
   550  		for _, t := range content.Text {
   551  			tx := t.X + t.W/2
   552  			ty := t.Y
   553  			if x1 < tx && tx < x2 && y2-dy1 < ty && ty < y2+dy1 {
   554  				sub = append(sub, t)
   555  			}
   556  		}
   557  		var str []string
   558  		for _, t := range findWords(sub) {
   559  			str = append(str, t.S)
   560  		}
   561  		s := strings.Join(str, " ")
   562  		s = strings.Replace(s, ")(", ") (", -1)
   563  
   564  		// If bits contain "!" or "x", be replaced by the bits below it.
   565  		if strings.Contains(s, "!") || strings.Contains(s, "x") {
   566  			var sub1 []pdf.Text
   567  			for _, t := range content.Text {
   568  				tx := t.X + t.W/2
   569  				ty := t.Y
   570  				if x1 < tx && tx < x2 && y1-dy1 < ty && ty < y1+dy1 {
   571  					sub1 = append(sub1, t)
   572  				}
   573  
   574  			}
   575  			var str1 []string
   576  			for _, t := range findWords(sub1) {
   577  				str1 = append(str1, t.S)
   578  			}
   579  			s = strings.Join(str1, " ")
   580  			s = strings.Replace(s, ")(", ") (", -1)
   581  		}
   582  
   583  		n := len(strings.Fields(s))
   584  
   585  		var b int
   586  		if IMMRE.MatchString(s) {
   587  			bitNum := strings.TrimPrefix(s, "imm")
   588  			b, _ = strconv.Atoi(bitNum)
   589  		} else if s == "immhi" {
   590  			b = 19
   591  		} else {
   592  			b = int(float64(nbit)*(x2-x1)/dx + 0.5)
   593  		}
   594  		if n == b {
   595  			for k, f := range strings.Fields(s) {
   596  				if k > 0 {
   597  					fmt.Fprintf(&buf, "|")
   598  				}
   599  				fmt.Fprintf(&buf, "%s", f)
   600  			}
   601  		} else {
   602  			if n != 1 {
   603  				fmt.Fprintf(os.Stderr, "%s - multi-field %d-bit encoding: %s\n", name, n, s)
   604  			}
   605  			fmt.Fprintf(&buf, "%s:%d", s, b)
   606  		}
   607  		total += b
   608  	}
   609  
   610  	if total != nbit || total == 0 {
   611  		fmt.Fprintf(os.Stderr, "%s - %d-bit encoding\n", name, total)
   612  	}
   613  	return buf.String(), i
   614  }
   615  
   616  type RectHorizontal []pdf.Rect
   617  
   618  func (x RectHorizontal) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   619  func (x RectHorizontal) Less(i, j int) bool { return x[i].Min.X < x[j].Min.X }
   620  func (x RectHorizontal) Len() int           { return len(x) }
   621  
   622  func checkNoEncodings(num int, text []pdf.Text) {
   623  	for _, t := range text {
   624  		if match(t, "Helvetica-Bold", 9, "Encoding") {
   625  			fmt.Fprintf(os.Stderr, "page %d: unexpected encoding: %s\n", num, t.S)
   626  		}
   627  	}
   628  }
   629  
   630  func match(t pdf.Text, font string, size float64, substr string) bool {
   631  	return t.Font == font && math.Abs(t.FontSize-size) < 0.1 && strings.Contains(t.S, substr)
   632  }
   633  
   634  func findWords(chars []pdf.Text) (words []pdf.Text) {
   635  	// Sort by Y coordinate and normalize.
   636  	const nudge = 1
   637  	sort.Sort(pdf.TextVertical(chars))
   638  	old := -100000.0
   639  	for i, c := range chars {
   640  		if c.Y != old && math.Abs(old-c.Y) < nudge {
   641  			chars[i].Y = old
   642  		} else {
   643  			old = c.Y
   644  		}
   645  	}
   646  
   647  	// Sort by Y coordinate, breaking ties with X.
   648  	// This will bring letters in a single word together.
   649  	sort.Sort(pdf.TextVertical(chars))
   650  
   651  	// Loop over chars.
   652  	for i := 0; i < len(chars); {
   653  		// Find all chars on line.
   654  		j := i + 1
   655  		for j < len(chars) && chars[j].Y == chars[i].Y {
   656  			j++
   657  		}
   658  		var end float64
   659  		// Split line into words (really, phrases).
   660  		for k := i; k < j; {
   661  			ck := &chars[k]
   662  			s := ck.S
   663  			end = ck.X + ck.W
   664  			charSpace := ck.FontSize / 6
   665  			wordSpace := ck.FontSize * 2 / 3
   666  			l := k + 1
   667  			for l < j {
   668  				// Grow word.
   669  				cl := &chars[l]
   670  				if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
   671  					s += cl.S
   672  					end = cl.X + cl.W
   673  					l++
   674  					continue
   675  				}
   676  				// Add space to phrase before next word.
   677  				if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
   678  					s += " " + cl.S
   679  					end = cl.X + cl.W
   680  					l++
   681  					continue
   682  				}
   683  				break
   684  			}
   685  			f := ck.Font
   686  			f = strings.TrimSuffix(f, ",Italic")
   687  			f = strings.TrimSuffix(f, "-Italic")
   688  			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
   689  			k = l
   690  		}
   691  		i = j
   692  	}
   693  
   694  	return words
   695  }
   696  
   697  func sameFont(f1, f2 string) bool {
   698  	f1 = strings.TrimSuffix(f1, ",Italic")
   699  	f1 = strings.TrimSuffix(f1, "-Italic")
   700  	f2 = strings.TrimSuffix(f1, ",Italic")
   701  	f2 = strings.TrimSuffix(f1, "-Italic")
   702  	return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
   703  }
   704  
   705  var jsFix = strings.NewReplacer(
   706  	`\u003c`, `<`,
   707  	`\u003e`, `>`,
   708  	`\u0026`, `&`,
   709  	`\u0009`, `\t`,
   710  )
   711  
   712  func printTable(name string, table []Inst) {
   713  	_ = strconv.Atoi
   714  }
   715  

View as plain text