1
2
3
4
5
6
7
8
9 package main
10
11 import (
12 "bytes"
13 "flag"
14 "fmt"
15 "log"
16 "reflect"
17 "sort"
18 "strings"
19
20 "golang.org/x/text/internal/gen"
21 "golang.org/x/text/language"
22 "golang.org/x/text/unicode/cldr"
23 )
24
25 var (
26 test = flag.Bool("test", false,
27 "test existing tables; can be used to compare web data with package data.")
28 outputFile = flag.String("output", "tables.go", "output file")
29
30 stats = flag.Bool("stats", false, "prints statistics to stderr")
31
32 short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
33 draft = flag.String("draft",
34 "contributed",
35 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
36 pkg = flag.String("package",
37 "display",
38 "the name of the package in which the generated file is to be included")
39
40 tags = newTagSet("tags",
41 []language.Tag{},
42 "space-separated list of tags to include or empty for all")
43 dict = newTagSet("dict",
44 dictTags(),
45 "space-separated list or tags for which to include a Dictionary. "+
46 `"" means the common list from go.text/language.`)
47 )
48
49 func dictTags() (tag []language.Tag) {
50
51 const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
52 "es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
53 "ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
54 "pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
55 "zh zh-Hans zh-Hant zu"
56
57 for _, s := range strings.Split(str, " ") {
58 tag = append(tag, language.MustParse(s))
59 }
60 return tag
61 }
62
63 func main() {
64 gen.Init()
65
66
67 r := gen.OpenCLDRCoreZip()
68 defer r.Close()
69
70 d := &cldr.Decoder{}
71 d.SetDirFilter("main", "supplemental")
72 d.SetSectionFilter("localeDisplayNames")
73 data, err := d.DecodeZip(r)
74 if err != nil {
75 log.Fatalf("DecodeZip: %v", err)
76 }
77
78 w := gen.NewCodeWriter()
79 defer w.WriteGoFile(*outputFile, "display")
80
81 gen.WriteCLDRVersion(w)
82
83 b := builder{
84 w: w,
85 data: data,
86 group: make(map[string]*group),
87 }
88 b.generate()
89 }
90
91 const tagForm = language.All
92
93
94
95 type tagSet map[language.Tag]bool
96
97 func newTagSet(name string, tags []language.Tag, usage string) tagSet {
98 f := tagSet(make(map[language.Tag]bool))
99 for _, t := range tags {
100 f[t] = true
101 }
102 flag.Var(f, name, usage)
103 return f
104 }
105
106
107 func (f tagSet) String() string {
108 tags := []string{}
109 for t := range f {
110 tags = append(tags, t.String())
111 }
112 sort.Strings(tags)
113 return strings.Join(tags, " ")
114 }
115
116
117 func (f tagSet) Set(s string) error {
118 if s != "" {
119 for _, s := range strings.Split(s, " ") {
120 if s != "" {
121 tag, err := tagForm.Parse(s)
122 if err != nil {
123 return err
124 }
125 f[tag] = true
126 }
127 }
128 }
129 return nil
130 }
131
132 func (f tagSet) contains(t language.Tag) bool {
133 if len(f) == 0 {
134 return true
135 }
136 return f[t]
137 }
138
139
140 type builder struct {
141 w *gen.CodeWriter
142
143 data *cldr.CLDR
144
145 fromLocs []string
146
147
148 toTags []string
149 toTagIndex map[string]int
150
151
152 supported []language.Tag
153
154
155 group map[string]*group
156
157
158 sizeIndex int
159 sizeData int
160 totalSize int
161 }
162
163 type group struct {
164
165 lang map[language.Tag]keyValues
166 headers []header
167
168 toTags []string
169 threeStart int
170 fourPlusStart int
171 }
172
173
174 func (g *group) set(t language.Tag, typ, name string) {
175 kv := g.lang[t]
176 if kv == nil {
177 kv = make(keyValues)
178 g.lang[t] = kv
179 }
180 if kv[typ] == "" {
181 kv[typ] = name
182 }
183 }
184
185 type keyValues map[string]string
186
187 type header struct {
188 tag language.Tag
189 data string
190 index []uint16
191 }
192
193 var versionInfo = `// Version is deprecated. Use CLDRVersion.
194 const Version = %#v
195
196 `
197
198 var self = language.MustParse("mul")
199
200
201 func (b *builder) generate() {
202 fmt.Fprintf(b.w, versionInfo, cldr.Version)
203
204 b.filter()
205 b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
206 if ldn.Languages != nil {
207 for _, v := range ldn.Languages.Language {
208 lang := v.Type
209 if lang == "root" {
210
211
212 continue
213 }
214 tag := tagForm.MustParse(lang)
215 if tags.contains(tag) {
216 g.set(loc, tag.String(), v.Data())
217 }
218 }
219 }
220 })
221 b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
222 if ldn.Scripts != nil {
223 for _, v := range ldn.Scripts.Script {
224 code := language.MustParseScript(v.Type)
225 if code.IsPrivateUse() {
226
227
228 if loc == language.English {
229 log.Fatal("Consider including data for private use scripts.")
230 }
231 continue
232 }
233 g.set(loc, code.String(), v.Data())
234 }
235 }
236 })
237 b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
238 if ldn.Territories != nil {
239 for _, v := range ldn.Territories.Territory {
240 g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
241 }
242 }
243 })
244
245 b.makeSupported()
246
247 b.writeParents()
248
249 b.writeGroup("lang")
250 b.writeGroup("script")
251 b.writeGroup("region")
252
253 b.w.WriteConst("numSupported", len(b.supported))
254 buf := bytes.Buffer{}
255 for _, tag := range b.supported {
256 fmt.Fprint(&buf, tag.String(), "|")
257 }
258 b.w.WriteConst("supported", buf.String())
259
260 b.writeDictionaries()
261
262 b.supported = []language.Tag{self}
263
264
265
266
267
268 for i := 0; i < 4; i++ {
269 b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
270 parent := tag
271 if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
272 parent, _ = language.Raw.Compose(b)
273 }
274 if ldn.Languages != nil {
275 for _, v := range ldn.Languages.Language {
276 key := tagForm.MustParse(v.Type)
277 saved := key
278 if key == parent {
279 g.set(self, tag.String(), v.Data())
280 }
281 for k := 0; k < i; k++ {
282 key = key.Parent()
283 }
284 if key == tag {
285 g.set(self, saved.String(), v.Data())
286 }
287 }
288 }
289 })
290 }
291
292 b.writeGroup("self")
293 }
294
295 func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
296 b.sizeIndex = 0
297 b.sizeData = 0
298 b.toTags = nil
299 b.fromLocs = nil
300 b.toTagIndex = make(map[string]int)
301
302 g := b.group[name]
303 if g == nil {
304 g = &group{lang: make(map[language.Tag]keyValues)}
305 b.group[name] = g
306 }
307 for _, loc := range b.data.Locales() {
308
309
310 ldml := b.data.RawLDML(loc)
311
312
313
314
315 tag, err := tagForm.Parse(loc)
316 if err != nil {
317 if ldml.LocaleDisplayNames != nil {
318 log.Fatalf("setData: %v", err)
319 }
320 continue
321 }
322 if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
323 f(g, tag, ldml.LocaleDisplayNames)
324 }
325 }
326 }
327
328 func (b *builder) filter() {
329 filter := func(s *cldr.Slice) {
330 if *short {
331 s.SelectOnePerGroup("alt", []string{"short", ""})
332 } else {
333 s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
334 }
335 d, err := cldr.ParseDraft(*draft)
336 if err != nil {
337 log.Fatalf("filter: %v", err)
338 }
339 s.SelectDraft(d)
340 }
341 for _, loc := range b.data.Locales() {
342 if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
343 if ldn.Languages != nil {
344 s := cldr.MakeSlice(&ldn.Languages.Language)
345 if filter(&s); len(ldn.Languages.Language) == 0 {
346 ldn.Languages = nil
347 }
348 }
349 if ldn.Scripts != nil {
350 s := cldr.MakeSlice(&ldn.Scripts.Script)
351 if filter(&s); len(ldn.Scripts.Script) == 0 {
352 ldn.Scripts = nil
353 }
354 }
355 if ldn.Territories != nil {
356 s := cldr.MakeSlice(&ldn.Territories.Territory)
357 if filter(&s); len(ldn.Territories.Territory) == 0 {
358 ldn.Territories = nil
359 }
360 }
361 }
362 }
363 }
364
365
366 func (b *builder) makeSupported() {
367
368 for _, g := range b.group {
369 for t, _ := range g.lang {
370 b.supported = append(b.supported, t)
371 }
372 }
373 b.supported = b.supported[:unique(tagsSorter(b.supported))]
374
375 }
376
377 type tagsSorter []language.Tag
378
379 func (a tagsSorter) Len() int { return len(a) }
380 func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
381 func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
382
383 func (b *builder) writeGroup(name string) {
384 g := b.group[name]
385
386 for _, kv := range g.lang {
387 for t, _ := range kv {
388 g.toTags = append(g.toTags, t)
389 }
390 }
391 g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
392
393
394 g.headers = make([]header, len(b.supported))
395 for i, sup := range b.supported {
396 kv, ok := g.lang[sup]
397 if !ok {
398 g.headers[i].tag = sup
399 continue
400 }
401 data := []byte{}
402 index := make([]uint16, len(g.toTags), len(g.toTags)+1)
403 for j, t := range g.toTags {
404 index[j] = uint16(len(data))
405 data = append(data, kv[t]...)
406 }
407 index = append(index, uint16(len(data)))
408
409
410
411 n := len(index)
412 for ; n >= 2 && index[n-2] == index[n-1]; n-- {
413 }
414 index = index[:n]
415
416
417
418 if cldr.Version == "26" && sup.String() == "hsb" {
419 data = bytes.Replace(data, []byte{'"'}, nil, 1)
420 }
421 g.headers[i] = header{sup, string(data), index}
422 }
423 g.writeTable(b.w, name)
424 }
425
426 type tagsBySize []string
427
428 func (l tagsBySize) Len() int { return len(l) }
429 func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
430 func (l tagsBySize) Less(i, j int) bool {
431 a, b := l[i], l[j]
432
433 if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
434 return len(a) < len(b)
435 }
436 return a < b
437 }
438
439
440
441 func parentIndices(tags []language.Tag) []int16 {
442 index := make(map[language.Tag]int16)
443 for i, t := range tags {
444 index[t] = int16(i)
445 }
446
447
448 parents := make([]int16, len(tags))
449 for i, t := range tags {
450 parents[i] = -1
451 for t = t.Parent(); t != language.Und; t = t.Parent() {
452 if j, ok := index[t]; ok {
453 parents[i] = j
454 break
455 }
456 }
457 }
458 return parents
459 }
460
461 func (b *builder) writeParents() {
462 parents := parentIndices(b.supported)
463 fmt.Fprintf(b.w, "var parents = ")
464 b.w.WriteArray(parents)
465 }
466
467
468
469 func writeKeys(w *gen.CodeWriter, name string, keys []string) {
470 w.Size += int(3 * reflect.TypeOf("").Size())
471 w.WriteComment("Number of keys: %d", len(keys))
472 fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
473 for i := 2; i <= 4; i++ {
474 sub := []string{}
475 for _, t := range keys {
476 if len(t) != i {
477 break
478 }
479 sub = append(sub, t)
480 }
481 s := strings.Join(sub, "")
482 w.WriteString(s)
483 fmt.Fprintf(w, ",\n")
484 keys = keys[len(sub):]
485 }
486 fmt.Fprintln(w, "\t}")
487 if len(keys) > 0 {
488 w.Size += int(reflect.TypeOf([]string{}).Size())
489 fmt.Fprintf(w, "\t%sTagsLong = ", name)
490 w.WriteSlice(keys)
491 }
492 fmt.Fprintln(w, ")\n")
493 }
494
495
496 func identifier(t language.Tag) string {
497 return strings.Replace(t.String(), "-", "", -1)
498 }
499
500 func (h *header) writeEntry(w *gen.CodeWriter, name string) {
501 if len(dict) > 0 && dict.contains(h.tag) {
502 fmt.Fprintf(w, "\t{ // %s\n", h.tag)
503 fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
504 fmt.Fprintln(w, "\t},")
505 } else if len(h.data) == 0 {
506 fmt.Fprintln(w, "\t\t{}, //", h.tag)
507 } else {
508 fmt.Fprintf(w, "\t{ // %s\n", h.tag)
509 w.WriteString(h.data)
510 fmt.Fprintln(w, ",")
511 w.WriteSlice(h.index)
512 fmt.Fprintln(w, ",\n\t},")
513 }
514 }
515
516
517
518 func (h *header) writeSingle(w *gen.CodeWriter, name string) {
519 if len(dict) > 0 && dict.contains(h.tag) {
520 tag := identifier(h.tag)
521 w.WriteConst(tag+name+"Str", h.data)
522
523
524
525
526 w.WriteVar(tag+name+"Idx", h.index)
527 }
528 }
529
530
531 func (g *group) writeTable(w *gen.CodeWriter, name string) {
532 start := w.Size
533 writeKeys(w, name, g.toTags)
534 w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
535
536 fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
537
538 title := strings.Title(name)
539 for _, h := range g.headers {
540 h.writeEntry(w, title)
541 }
542 fmt.Fprintln(w, "}\n")
543
544 for _, h := range g.headers {
545 h.writeSingle(w, title)
546 }
547 n := w.Size - start
548 fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
549 }
550
551 func (b *builder) writeDictionaries() {
552 fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
553 fmt.Fprintln(b.w, "var (")
554 parents := parentIndices(b.supported)
555
556 for i, t := range b.supported {
557 if dict.contains(t) {
558 ident := identifier(t)
559 fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
560 if p := parents[i]; p == -1 {
561 fmt.Fprintln(b.w, "\t\tnil,")
562 } else {
563 fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
564 }
565 fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
566 fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
567 fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
568 fmt.Fprintln(b.w, "\t}")
569 }
570 }
571 fmt.Fprintln(b.w, ")")
572
573 var s string
574 var a []uint16
575 sz := reflect.TypeOf(s).Size()
576 sz += reflect.TypeOf(a).Size()
577 sz *= 3
578 sz += reflect.TypeOf(&a).Size()
579 n := int(sz) * len(dict)
580 fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
581
582 b.w.Size += n
583 }
584
585
586
587 func unique(a sort.Interface) int {
588 if a.Len() == 0 {
589 return 0
590 }
591 sort.Sort(a)
592 k := 1
593 for i := 1; i < a.Len(); i++ {
594 if a.Less(k-1, i) {
595 if k != i {
596 a.Swap(k, i)
597 }
598 k++
599 }
600 }
601 return k
602 }
603
View as plain text