...

Source file src/golang.org/x/text/width/gen.go

Documentation: golang.org/x/text/width

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  
     7  // This program generates the trie for width operations. The generated table
     8  // includes width category information as well as the normalization mappings.
     9  package main
    10  
    11  import (
    12  	"bytes"
    13  	"fmt"
    14  	"io"
    15  	"log"
    16  	"math"
    17  	"unicode/utf8"
    18  
    19  	"golang.org/x/text/internal/gen"
    20  	"golang.org/x/text/internal/triegen"
    21  )
    22  
    23  // See gen_common.go for flags.
    24  
    25  func main() {
    26  	gen.Init()
    27  	genTables()
    28  	genTests()
    29  	gen.Repackage("gen_trieval.go", "trieval.go", "width")
    30  	gen.Repackage("gen_common.go", "common_test.go", "width")
    31  }
    32  
    33  func genTables() {
    34  	t := triegen.NewTrie("width")
    35  	// fold and inverse mappings. See mapComment for a description of the format
    36  	// of each entry. Add dummy value to make an index of 0 mean no mapping.
    37  	inverse := [][4]byte{{}}
    38  	mapping := map[[4]byte]int{[4]byte{}: 0}
    39  
    40  	getWidthData(func(r rune, tag elem, alt rune) {
    41  		idx := 0
    42  		if alt != 0 {
    43  			var buf [4]byte
    44  			buf[0] = byte(utf8.EncodeRune(buf[1:], alt))
    45  			s := string(r)
    46  			buf[buf[0]] ^= s[len(s)-1]
    47  			var ok bool
    48  			if idx, ok = mapping[buf]; !ok {
    49  				idx = len(mapping)
    50  				if idx > math.MaxUint8 {
    51  					log.Fatalf("Index %d does not fit in a byte.", idx)
    52  				}
    53  				mapping[buf] = idx
    54  				inverse = append(inverse, buf)
    55  			}
    56  		}
    57  		t.Insert(r, uint64(tag|elem(idx)))
    58  	})
    59  
    60  	w := &bytes.Buffer{}
    61  	gen.WriteUnicodeVersion(w)
    62  
    63  	sz, err := t.Gen(w)
    64  	if err != nil {
    65  		log.Fatal(err)
    66  	}
    67  
    68  	sz += writeMappings(w, inverse)
    69  
    70  	fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)
    71  
    72  	gen.WriteVersionedGoFile(*outputFile, "width", w.Bytes())
    73  }
    74  
    75  const inverseDataComment = `
    76  // inverseData contains 4-byte entries of the following format:
    77  //   <length> <modified UTF-8-encoded rune> <0 padding>
    78  // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
    79  // UTF-8 encoding of the original rune. Mappings often have the following
    80  // pattern:
    81  //   A -> A  (U+FF21 -> U+0041)
    82  //   B -> B  (U+FF22 -> U+0042)
    83  //   ...
    84  // By xor-ing the last byte the same entry can be shared by many mappings. This
    85  // reduces the total number of distinct entries by about two thirds.
    86  // The resulting entry for the aforementioned mappings is
    87  //   { 0x01, 0xE0, 0x00, 0x00 }
    88  // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
    89  //   E0 ^ A1 = 41.
    90  // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
    91  //   E0 ^ A2 = 42.
    92  // Note that because of the xor-ing, the byte sequence stored in the entry is
    93  // not valid UTF-8.`
    94  
    95  func writeMappings(w io.Writer, data [][4]byte) int {
    96  	fmt.Fprintln(w, inverseDataComment)
    97  	fmt.Fprintf(w, "var inverseData = [%d][4]byte{\n", len(data))
    98  	for _, x := range data {
    99  		fmt.Fprintf(w, "{ 0x%02x, 0x%02x, 0x%02x, 0x%02x },\n", x[0], x[1], x[2], x[3])
   100  	}
   101  	fmt.Fprintln(w, "}")
   102  	return len(data) * 4
   103  }
   104  
   105  func genTests() {
   106  	w := &bytes.Buffer{}
   107  	fmt.Fprintf(w, "\nvar mapRunes = map[rune]struct{r rune; e elem}{\n")
   108  	getWidthData(func(r rune, tag elem, alt rune) {
   109  		if alt != 0 {
   110  			fmt.Fprintf(w, "\t0x%X: {0x%X, 0x%X},\n", r, alt, tag)
   111  		}
   112  	})
   113  	fmt.Fprintln(w, "}")
   114  	gen.WriteGoFile("runes_test.go", "width", w.Bytes())
   115  }
   116  

View as plain text