1
2
3
4
5
6
7
8
9
10 package main
11
12 import (
13 "flag"
14 "log"
15 "unicode"
16 "unicode/utf8"
17
18 "golang.org/x/text/internal/gen"
19 "golang.org/x/text/internal/triegen"
20 "golang.org/x/text/internal/ucd"
21 "golang.org/x/text/unicode/norm"
22 "golang.org/x/text/unicode/rangetable"
23 )
24
25 var outputFile = flag.String("output", "tables.go", "output file for generated tables; default tables.go")
26
27 var assigned, disallowedRunes *unicode.RangeTable
28
29 var runeCategory = map[rune]category{}
30
31 var overrides = map[category]category{
32 viramaModifier: viramaJoinT,
33 greek: greekJoinT,
34 hebrew: hebrewJoinT,
35 }
36
37 func setCategory(r rune, cat category) {
38 if c, ok := runeCategory[r]; ok {
39 if override, ok := overrides[c]; cat == joiningT && ok {
40 cat = override
41 } else {
42 log.Fatalf("%U: multiple categories for rune (%v and %v)", r, c, cat)
43 }
44 }
45 runeCategory[r] = cat
46 }
47
48 func init() {
49 if numCategories > 1<<propShift {
50 log.Fatalf("Number of categories is %d; may at most be %d", numCategories, 1<<propShift)
51 }
52 }
53
54 func main() {
55 gen.Init()
56
57
58 runes := []rune{}
59
60 ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
61 if p.String(1) == "Default_Ignorable_Code_Point" {
62 runes = append(runes, p.Rune(0))
63 }
64 })
65 ucd.Parse(gen.OpenUCDFile("PropList.txt"), func(p *ucd.Parser) {
66 switch p.String(1) {
67 case "Noncharacter_Code_Point":
68 runes = append(runes, p.Rune(0))
69 }
70 })
71
72 ucd.Parse(gen.OpenUCDFile("HangulSyllableType.txt"), func(p *ucd.Parser) {
73 switch p.String(1) {
74 case "L", "V", "T":
75 runes = append(runes, p.Rune(0))
76 }
77 })
78
79 disallowedRunes = rangetable.New(runes...)
80 assigned = rangetable.Assigned(unicode.Version)
81
82
83 runeCategory['l'] = latinSmallL
84 ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
85 const cccVirama = 9
86 if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
87 setCategory(p.Rune(0), viramaModifier)
88 }
89 })
90 ucd.Parse(gen.OpenUCDFile("Scripts.txt"), func(p *ucd.Parser) {
91 switch p.String(1) {
92 case "Greek":
93 setCategory(p.Rune(0), greek)
94 case "Hebrew":
95 setCategory(p.Rune(0), hebrew)
96 case "Hiragana", "Katakana", "Han":
97 setCategory(p.Rune(0), japanese)
98 }
99 })
100
101
102
103
104 for r, e := range exceptions {
105 if e.cat != 0 {
106 runeCategory[r] = e.cat
107 }
108 }
109 cat := map[string]category{
110 "L": joiningL,
111 "D": joiningD,
112 "T": joiningT,
113
114 "R": joiningR,
115 }
116 ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
117 switch v := p.String(1); v {
118 case "L", "D", "T", "R":
119 setCategory(p.Rune(0), cat[v])
120 }
121 })
122
123 writeTables()
124 gen.Repackage("gen_trieval.go", "trieval.go", "precis")
125 }
126
127 type exception struct {
128 prop property
129 cat category
130 }
131
132 func init() {
133
134
135 for i := rune(0); i <= 9; i++ {
136 exceptions[0x0660+i] = exception{
137 prop: disallowed,
138 cat: arabicIndicDigit,
139 }
140 exceptions[0x06F0+i] = exception{
141 prop: disallowed,
142 cat: extendedArabicIndicDigit,
143 }
144 }
145 }
146
147
148
149 var exceptions = map[rune]exception{
150 0x00DF: {prop: pValid},
151 0x03C2: {prop: pValid},
152 0x06FD: {prop: pValid},
153 0x06FE: {prop: pValid},
154 0x0F0B: {prop: pValid},
155 0x3007: {prop: pValid},
156
157
158
159
160
161
162
163
164
165 0x200C: {prop: disallowed, cat: zeroWidthNonJoiner},
166 0x200D: {prop: disallowed, cat: zeroWidthJoiner},
167
168
169 0x00B7: {prop: disallowed, cat: middleDot},
170 0x0375: {prop: disallowed, cat: greekLowerNumeralSign},
171 0x05F3: {prop: disallowed, cat: hebrewPreceding},
172 0x05F4: {prop: disallowed, cat: hebrewPreceding},
173 0x30FB: {prop: pValid, cat: katakanaMiddleDot},
174
175
176
177 0x0660: {prop: pValid},
178 0x0661: {prop: pValid},
179 0x0662: {prop: pValid},
180 0x0663: {prop: pValid},
181 0x0664: {prop: pValid},
182 0x0665: {prop: pValid},
183 0x0666: {prop: pValid},
184 0x0667: {prop: pValid},
185 0x0668: {prop: pValid},
186 0x0669: {prop: pValid},
187 0x06F0: {prop: pValid},
188 0x06F1: {prop: pValid},
189 0x06F2: {prop: pValid},
190 0x06F3: {prop: pValid},
191 0x06F4: {prop: pValid},
192 0x06F5: {prop: pValid},
193 0x06F6: {prop: pValid},
194 0x06F7: {prop: pValid},
195 0x06F8: {prop: pValid},
196 0x06F9: {prop: pValid},
197
198 0x0640: {prop: disallowed},
199 0x07FA: {prop: disallowed},
200 0x302E: {prop: disallowed},
201 0x302F: {prop: disallowed},
202 0x3031: {prop: disallowed},
203 0x3032: {prop: disallowed},
204 0x3033: {prop: disallowed},
205 0x3034: {prop: disallowed},
206 0x3035: {prop: disallowed},
207 0x303B: {prop: disallowed},
208 }
209
210
211
212 func isLetterDigits(r rune) bool {
213 return unicode.In(r,
214 unicode.Ll, unicode.Lu, unicode.Lm, unicode.Lo,
215 unicode.Mn, unicode.Mc,
216 unicode.Nd,
217 )
218 }
219
220 func isIdDisAndFreePVal(r rune) bool {
221 return unicode.In(r,
222
223
224 unicode.Lt, unicode.Nl, unicode.No,
225 unicode.Me,
226
227
228
229 unicode.Zs,
230
231
232
233 unicode.Sm, unicode.Sc, unicode.Sk, unicode.So,
234
235
236
237 unicode.Pc, unicode.Pd, unicode.Ps, unicode.Pe,
238 unicode.Pi, unicode.Pf, unicode.Po,
239 )
240 }
241
242
243 func hasCompat(r rune) bool {
244 return !norm.NFKC.IsNormalString(string(r))
245 }
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265 func writeTables() {
266 propTrie := triegen.NewTrie("derivedProperties")
267 w := gen.NewCodeWriter()
268 defer w.WriteVersionedGoFile(*outputFile, "precis")
269 gen.WriteUnicodeVersion(w)
270
271
272 for i := rune(0); i < unicode.MaxRune; i++ {
273 r := rune(i)
274
275 if !utf8.ValidRune(r) {
276 continue
277 }
278
279 e, ok := exceptions[i]
280 p := e.prop
281 switch {
282 case ok:
283 case !unicode.In(r, assigned):
284 p = unassigned
285 case r >= 0x0021 && r <= 0x007e:
286 p = pValid
287 case unicode.In(r, disallowedRunes, unicode.Cc):
288 p = disallowed
289 case hasCompat(r):
290 p = idDisOrFreePVal
291 case isLetterDigits(r):
292 p = pValid
293 case isIdDisAndFreePVal(r):
294 p = idDisOrFreePVal
295 default:
296 p = disallowed
297 }
298 cat := runeCategory[r]
299
300 if p == disallowed {
301 cat = exceptions[r].cat
302 }
303 propTrie.Insert(r, uint64(p)|uint64(cat))
304 }
305 sz, err := propTrie.Gen(w)
306 if err != nil {
307 log.Fatal(err)
308 }
309 w.Size += sz
310 }
311
View as plain text