1
2
3
4
5
6
7
8
9
10
11
12 package main
13
14 import (
15 "fmt"
16 "io"
17 "log"
18 "unicode"
19 "unicode/utf8"
20
21 "golang.org/x/text/internal/gen"
22 "golang.org/x/text/internal/triegen"
23 "golang.org/x/text/internal/ucd"
24 "golang.org/x/text/unicode/bidi"
25 )
26
27 func main() {
28 gen.Init()
29 genTables()
30 gen.Repackage("gen_trieval.go", "trieval.go", "idna")
31 gen.Repackage("gen_common.go", "common_test.go", "idna")
32 }
33
34 var runes = map[rune]info{}
35
36 func genTables() {
37 t := triegen.NewTrie("idna")
38
39 ucd.Parse(gen.OpenUCDFile("DerivedNormalizationProps.txt"), func(p *ucd.Parser) {
40 r := p.Rune(0)
41 if p.String(1) == "NFC_QC" {
42 runes[r] = mayNeedNorm
43 }
44 })
45 ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
46 r := p.Rune(0)
47
48 const cccVirama = 9
49 if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
50 runes[p.Rune(0)] = viramaModifier
51 }
52 switch {
53 case unicode.In(r, unicode.Mark):
54 runes[r] |= modifier | mayNeedNorm
55 }
56
57
58
59 switch p, _ := bidi.LookupRune(r); p.Class() {
60 case bidi.R, bidi.AL, bidi.AN:
61 if x := runes[r]; x != 0 && x != mayNeedNorm {
62 log.Fatalf("%U: rune both modifier and RTL letter/number", r)
63 }
64 runes[r] = rtl
65 }
66 })
67
68 ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
69 switch v := p.String(1); v {
70 case "L", "D", "T", "R":
71 runes[p.Rune(0)] |= joinType[v] << joinShift
72 }
73 })
74
75 ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
76 r := p.Rune(0)
77
78
79 if !utf8.ValidRune(r) {
80 return
81 }
82
83 cat := catFromEntry(p)
84 isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation
85 if !isMapped {
86
87
88
89
90
91
92 cat |= category(runes[r])
93 }
94
95 s := string(p.Runes(2))
96 if s != "" && !isMapped {
97 log.Fatalf("%U: Mapping with non-mapping category %d", r, cat)
98 }
99 t.Insert(r, uint64(makeEntry(r, s))+uint64(cat))
100 })
101
102 w := gen.NewCodeWriter()
103 defer w.WriteVersionedGoFile("tables.go", "idna")
104
105 gen.WriteUnicodeVersion(w)
106
107 w.WriteVar("mappings", string(mappings))
108 w.WriteVar("mappingIndex", mappingIndex)
109 w.WriteVar("xorData", string(xorData))
110
111 sz, err := t.Gen(w, triegen.Compact(&normCompacter{}))
112 if err != nil {
113 log.Fatal(err)
114 }
115 w.Size += sz
116 }
117
118 var (
119
120 mappings = []byte{}
121
122
123
124
125 mappingIndex = []uint16{0}
126 mapCache = map[string]int{}
127
128
129
130 xorData = []byte{}
131 xorCache = map[string]int{}
132 )
133
134
135 func makeEntry(r rune, mapped string) info {
136 orig := string(r)
137
138 if len(orig) != len(mapped) {
139
140 index := len(mappingIndex) - 1
141 if x, ok := mapCache[mapped]; ok {
142 index = x
143 } else {
144 mapCache[mapped] = index
145 mappings = append(mappings, mapped...)
146 mappingIndex = append(mappingIndex, uint16(len(mappings)))
147 }
148 return info(index) << indexShift
149 }
150
151
152 var b []byte
153 for i := 0; i < len(orig); i++ {
154 b = append(b, orig[i]^mapped[i])
155 }
156
157
158 for ; len(b) > 1 && b[0] == 0; b = b[1:] {
159 }
160
161 if len(b) == 1 {
162 return xorBit | inlineXOR | info(b[0])<<indexShift
163 }
164 mapped = string(b)
165
166
167 index := len(xorData)
168 if x, ok := xorCache[mapped]; ok {
169 index = x
170 } else {
171 xorCache[mapped] = index
172 xorData = append(xorData, byte(len(mapped)))
173 xorData = append(xorData, mapped...)
174 }
175 return xorBit | info(index)<<indexShift
176 }
177
178
179
180
181
182
183
184
185 const maxSparseEntries = 16
186
187 type normCompacter struct {
188 sparseBlocks [][]uint64
189 sparseOffset []uint16
190 sparseCount int
191 }
192
193 func mostFrequentStride(a []uint64) int {
194 counts := make(map[int]int)
195 var v int
196 for _, x := range a {
197 if stride := int(x) - v; v != 0 && stride >= 0 {
198 counts[stride]++
199 }
200 v = int(x)
201 }
202 var maxs, maxc int
203 for stride, cnt := range counts {
204 if cnt > maxc || (cnt == maxc && stride < maxs) {
205 maxs, maxc = stride, cnt
206 }
207 }
208 return maxs
209 }
210
211 func countSparseEntries(a []uint64) int {
212 stride := mostFrequentStride(a)
213 var v, count int
214 for _, tv := range a {
215 if int(tv)-v != stride {
216 if tv != 0 {
217 count++
218 }
219 }
220 v = int(tv)
221 }
222 return count
223 }
224
225 func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
226 if n := countSparseEntries(v); n <= maxSparseEntries {
227 return (n+1)*4 + 2, true
228 }
229 return 0, false
230 }
231
232 func (c *normCompacter) Store(v []uint64) uint32 {
233 h := uint32(len(c.sparseOffset))
234 c.sparseBlocks = append(c.sparseBlocks, v)
235 c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
236 c.sparseCount += countSparseEntries(v) + 1
237 return h
238 }
239
240 func (c *normCompacter) Handler() string {
241 return "idnaSparse.lookup"
242 }
243
244 func (c *normCompacter) Print(w io.Writer) (retErr error) {
245 p := func(f string, x ...interface{}) {
246 if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
247 retErr = err
248 }
249 }
250
251 ls := len(c.sparseBlocks)
252 p("// idnaSparseOffset: %d entries, %d bytes\n", ls, ls*2)
253 p("var idnaSparseOffset = %#v\n\n", c.sparseOffset)
254
255 ns := c.sparseCount
256 p("// idnaSparseValues: %d entries, %d bytes\n", ns, ns*4)
257 p("var idnaSparseValues = [%d]valueRange {", ns)
258 for i, b := range c.sparseBlocks {
259 p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
260 var v int
261 stride := mostFrequentStride(b)
262 n := countSparseEntries(b)
263 p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
264 for i, nv := range b {
265 if int(nv)-v != stride {
266 if v != 0 {
267 p(",hi:%#02x},", 0x80+i-1)
268 }
269 if nv != 0 {
270 p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
271 }
272 }
273 v = int(nv)
274 }
275 if v != 0 {
276 p(",hi:%#02x},", 0x80+len(b)-1)
277 }
278 }
279 p("\n}\n\n")
280 return
281 }
282
View as plain text