...

Source file src/golang.org/x/text/unicode/norm/triegen.go

Documentation: golang.org/x/text/unicode/norm

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  
     7  // Trie table generator.
     8  // Used by make*tables tools to generate a go file with trie data structures
     9  // for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
    10  // sequence are used to lookup offsets in the index table to be used for the
    11  // next byte. The last byte is used to index into a table with 16-bit values.
    12  
    13  package main
    14  
    15  import (
    16  	"fmt"
    17  	"io"
    18  )
    19  
    20  const maxSparseEntries = 16
    21  
    22  type normCompacter struct {
    23  	sparseBlocks [][]uint64
    24  	sparseOffset []uint16
    25  	sparseCount  int
    26  	name         string
    27  }
    28  
    29  func mostFrequentStride(a []uint64) int {
    30  	counts := make(map[int]int)
    31  	var v int
    32  	for _, x := range a {
    33  		if stride := int(x) - v; v != 0 && stride >= 0 {
    34  			counts[stride]++
    35  		}
    36  		v = int(x)
    37  	}
    38  	var maxs, maxc int
    39  	for stride, cnt := range counts {
    40  		if cnt > maxc || (cnt == maxc && stride < maxs) {
    41  			maxs, maxc = stride, cnt
    42  		}
    43  	}
    44  	return maxs
    45  }
    46  
    47  func countSparseEntries(a []uint64) int {
    48  	stride := mostFrequentStride(a)
    49  	var v, count int
    50  	for _, tv := range a {
    51  		if int(tv)-v != stride {
    52  			if tv != 0 {
    53  				count++
    54  			}
    55  		}
    56  		v = int(tv)
    57  	}
    58  	return count
    59  }
    60  
    61  func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
    62  	if n := countSparseEntries(v); n <= maxSparseEntries {
    63  		return (n+1)*4 + 2, true
    64  	}
    65  	return 0, false
    66  }
    67  
    68  func (c *normCompacter) Store(v []uint64) uint32 {
    69  	h := uint32(len(c.sparseOffset))
    70  	c.sparseBlocks = append(c.sparseBlocks, v)
    71  	c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
    72  	c.sparseCount += countSparseEntries(v) + 1
    73  	return h
    74  }
    75  
    76  func (c *normCompacter) Handler() string {
    77  	return c.name + "Sparse.lookup"
    78  }
    79  
    80  func (c *normCompacter) Print(w io.Writer) (retErr error) {
    81  	p := func(f string, x ...interface{}) {
    82  		if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
    83  			retErr = err
    84  		}
    85  	}
    86  
    87  	ls := len(c.sparseBlocks)
    88  	p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
    89  	p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
    90  
    91  	ns := c.sparseCount
    92  	p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
    93  	p("var %sSparseValues = [%d]valueRange {", c.name, ns)
    94  	for i, b := range c.sparseBlocks {
    95  		p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
    96  		var v int
    97  		stride := mostFrequentStride(b)
    98  		n := countSparseEntries(b)
    99  		p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
   100  		for i, nv := range b {
   101  			if int(nv)-v != stride {
   102  				if v != 0 {
   103  					p(",hi:%#02x},", 0x80+i-1)
   104  				}
   105  				if nv != 0 {
   106  					p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
   107  				}
   108  			}
   109  			v = int(nv)
   110  		}
   111  		if v != 0 {
   112  			p(",hi:%#02x},", 0x80+len(b)-1)
   113  		}
   114  	}
   115  	p("\n}\n\n")
   116  	return
   117  }
   118  

View as plain text