...

Source file src/golang.org/x/text/internal/triegen/print.go

Documentation: golang.org/x/text/internal/triegen

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package triegen
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  	"text/template"
    13  )
    14  
    15  // print writes all the data structures as well as the code necessary to use the
    16  // trie to w.
    17  func (b *builder) print(w io.Writer) error {
    18  	b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
    19  	b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
    20  	b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
    21  	b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
    22  	b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
    23  
    24  	// If we only have one root trie, all starter blocks are at position 0 and
    25  	// we can access the arrays directly.
    26  	if len(b.Trie) == 1 {
    27  		// At this point we cannot refer to the generated tables directly.
    28  		b.ASCIIBlock = b.Name + "Values"
    29  		b.StarterBlock = b.Name + "Index"
    30  	} else {
    31  		// Otherwise we need to have explicit starter indexes in the trie
    32  		// structure.
    33  		b.ASCIIBlock = "t.ascii"
    34  		b.StarterBlock = "t.utf8Start"
    35  	}
    36  
    37  	b.SourceType = "[]byte"
    38  	if err := lookupGen.Execute(w, b); err != nil {
    39  		return err
    40  	}
    41  
    42  	b.SourceType = "string"
    43  	if err := lookupGen.Execute(w, b); err != nil {
    44  		return err
    45  	}
    46  
    47  	if err := trieGen.Execute(w, b); err != nil {
    48  		return err
    49  	}
    50  
    51  	for _, c := range b.Compactions {
    52  		if err := c.c.Print(w); err != nil {
    53  			return err
    54  		}
    55  	}
    56  
    57  	return nil
    58  }
    59  
    60  func printValues(n int, values []uint64) string {
    61  	w := &bytes.Buffer{}
    62  	boff := n * blockSize
    63  	fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
    64  	var newline bool
    65  	for i, v := range values {
    66  		if i%6 == 0 {
    67  			newline = true
    68  		}
    69  		if v != 0 {
    70  			if newline {
    71  				fmt.Fprintf(w, "\n")
    72  				newline = false
    73  			}
    74  			fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
    75  		}
    76  	}
    77  	return w.String()
    78  }
    79  
    80  func printIndex(b *builder, nr int, n *node) string {
    81  	w := &bytes.Buffer{}
    82  	boff := nr * blockSize
    83  	fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
    84  	var newline bool
    85  	for i, c := range n.children {
    86  		if i%8 == 0 {
    87  			newline = true
    88  		}
    89  		if c != nil {
    90  			v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
    91  			if v != 0 {
    92  				if newline {
    93  					fmt.Fprintf(w, "\n")
    94  					newline = false
    95  				}
    96  				fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
    97  			}
    98  		}
    99  	}
   100  	return w.String()
   101  }
   102  
   103  var (
   104  	trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
   105  		"printValues": printValues,
   106  		"printIndex":  printIndex,
   107  		"title":       strings.Title,
   108  		"dec":         func(x int) int { return x - 1 },
   109  		"psize": func(n int) string {
   110  			return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
   111  		},
   112  	}).Parse(trieTemplate))
   113  	lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
   114  )
   115  
   116  // TODO: consider the return type of lookup. It could be uint64, even if the
   117  // internal value type is smaller. We will have to verify this with the
   118  // performance of unicode/norm, which is very sensitive to such changes.
   119  const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
   120  // {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
   121  type {{.Name}}Trie struct { {{if $multi}}
   122  	ascii []{{.ValueType}} // index for ASCII bytes
   123  	utf8Start  []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
   124  {{end}}}
   125  
   126  func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
   127  	h := {{.Name}}TrieHandles[i]
   128  	return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
   129  }
   130  
   131  type {{.Name}}TrieHandle struct {
   132  	ascii, multi {{.IndexType}}
   133  }
   134  
   135  // {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
   136  var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
   137  {{range .Trie}}	{ {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
   138  {{end}}}{{else}}
   139  	return &{{.Name}}Trie{}
   140  }
   141  {{end}}
   142  // lookupValue determines the type of block n and looks up the value for b.
   143  func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
   144  	switch { {{range $i, $c := .Compactions}}
   145  		{{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
   146  			n -= {{$c.Offset}}{{end}}
   147  			return {{print $b.ValueType}}({{$c.Handler}}){{end}}
   148  	}
   149  }
   150  
   151  // {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
   152  // The third block is the zero block.
   153  var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
   154  {{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
   155  {{end}}}
   156  
   157  // {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
   158  // Block 0 is the zero block.
   159  var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
   160  {{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
   161  {{end}}}
   162  `
   163  
   164  // TODO: consider allowing zero-length strings after evaluating performance with
   165  // unicode/norm.
   166  const lookupTemplate = `
   167  // lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
   168  // the width in bytes of this encoding. The size will be 0 if s does not
   169  // hold enough bytes to complete the encoding. len(s) must be greater than 0.
   170  func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
   171  	c0 := s[0]
   172  	switch {
   173  	case c0 < 0x80: // is ASCII
   174  		return {{.ASCIIBlock}}[c0], 1
   175  	case c0 < 0xC2:
   176  		return 0, 1  // Illegal UTF-8: not a starter, not ASCII.
   177  	case c0 < 0xE0: // 2-byte UTF-8
   178  		if len(s) < 2 {
   179  			return 0, 0
   180  		}
   181  		i := {{.StarterBlock}}[c0]
   182  		c1 := s[1]
   183  		if c1 < 0x80 || 0xC0 <= c1 {
   184  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   185  		}
   186  		return t.lookupValue(uint32(i), c1), 2
   187  	case c0 < 0xF0: // 3-byte UTF-8
   188  		if len(s) < 3 {
   189  			return 0, 0
   190  		}
   191  		i := {{.StarterBlock}}[c0]
   192  		c1 := s[1]
   193  		if c1 < 0x80 || 0xC0 <= c1 {
   194  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   195  		}
   196  		o := uint32(i)<<6 + uint32(c1)
   197  		i = {{.Name}}Index[o]
   198  		c2 := s[2]
   199  		if c2 < 0x80 || 0xC0 <= c2 {
   200  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   201  		}
   202  		return t.lookupValue(uint32(i), c2), 3
   203  	case c0 < 0xF8: // 4-byte UTF-8
   204  		if len(s) < 4 {
   205  			return 0, 0
   206  		}
   207  		i := {{.StarterBlock}}[c0]
   208  		c1 := s[1]
   209  		if c1 < 0x80 || 0xC0 <= c1 {
   210  			return 0, 1 // Illegal UTF-8: not a continuation byte.
   211  		}
   212  		o := uint32(i)<<6 + uint32(c1)
   213  		i = {{.Name}}Index[o]
   214  		c2 := s[2]
   215  		if c2 < 0x80 || 0xC0 <= c2 {
   216  			return 0, 2 // Illegal UTF-8: not a continuation byte.
   217  		}
   218  		o = uint32(i)<<6 + uint32(c2)
   219  		i = {{.Name}}Index[o]
   220  		c3 := s[3]
   221  		if c3 < 0x80 || 0xC0 <= c3 {
   222  			return 0, 3 // Illegal UTF-8: not a continuation byte.
   223  		}
   224  		return t.lookupValue(uint32(i), c3), 4
   225  	}
   226  	// Illegal rune
   227  	return 0, 1
   228  }
   229  
   230  // lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
   231  // s must start with a full and valid UTF-8 encoded rune.
   232  func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
   233  	c0 := s[0]
   234  	if c0 < 0x80 { // is ASCII
   235  		return {{.ASCIIBlock}}[c0]
   236  	}
   237  	i := {{.StarterBlock}}[c0]
   238  	if c0 < 0xE0 { // 2-byte UTF-8
   239  		return t.lookupValue(uint32(i), s[1])
   240  	}
   241  	i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
   242  	if c0 < 0xF0 { // 3-byte UTF-8
   243  		return t.lookupValue(uint32(i), s[2])
   244  	}
   245  	i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
   246  	if c0 < 0xF8 { // 4-byte UTF-8
   247  		return t.lookupValue(uint32(i), s[3])
   248  	}
   249  	return 0
   250  }
   251  `
   252  

View as plain text