1
2
3
4
5
6
7
8
9
10 package main
11
12 import (
13 "flag"
14 "fmt"
15 "io"
16 "log"
17 "sort"
18 "strconv"
19 "strings"
20
21 "golang.org/x/text/internal/gen"
22 "golang.org/x/text/internal/language"
23 "golang.org/x/text/unicode/cldr"
24 )
25
26 var (
27 test = flag.Bool("test",
28 false,
29 "test existing tables; can be used to compare web data with package data.")
30 outputFile = flag.String("output",
31 "tables.go",
32 "output file for generated tables")
33 )
34
35 func main() {
36 gen.Init()
37
38 w := gen.NewCodeWriter()
39 defer w.WriteGoFile("tables.go", "language")
40
41 b := newBuilder(w)
42 gen.WriteCLDRVersion(w)
43
44 b.writeConstants()
45 b.writeMatchData()
46 }
47
48 type builder struct {
49 w *gen.CodeWriter
50 hw io.Writer
51 data *cldr.CLDR
52 supp *cldr.SupplementalData
53 }
54
55 func (b *builder) langIndex(s string) uint16 {
56 return uint16(language.MustParseBase(s))
57 }
58
59 func (b *builder) regionIndex(s string) int {
60 return int(language.MustParseRegion(s))
61 }
62
63 func (b *builder) scriptIndex(s string) int {
64 return int(language.MustParseScript(s))
65 }
66
67 func newBuilder(w *gen.CodeWriter) *builder {
68 r := gen.OpenCLDRCoreZip()
69 defer r.Close()
70 d := &cldr.Decoder{}
71 data, err := d.DecodeZip(r)
72 if err != nil {
73 log.Fatal(err)
74 }
75 b := builder{
76 w: w,
77 hw: io.MultiWriter(w, w.Hash),
78 data: data,
79 supp: data.Supplemental(),
80 }
81 return &b
82 }
83
84
85
86 func (b *builder) writeConsts(f func(string) int, values ...string) {
87 fmt.Fprintln(b.w, "const (")
88 for _, v := range values {
89 fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
90 }
91 fmt.Fprintln(b.w, ")")
92 }
93
94
95
96 var langConsts = []string{
97 "de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
98 }
99
100 var scriptConsts = []string{
101 "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
102 "Zzzz",
103 }
104
105 var regionConsts = []string{
106 "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
107 "ZZ", "XA", "XC", "XK",
108 }
109
110 func (b *builder) writeConstants() {
111 b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
112 b.writeConsts(b.regionIndex, regionConsts...)
113 b.writeConsts(b.scriptIndex, scriptConsts...)
114 }
115
116 type mutualIntelligibility struct {
117 want, have uint16
118 distance uint8
119 oneway bool
120 }
121
122 type scriptIntelligibility struct {
123 wantLang, haveLang uint16
124 wantScript, haveScript uint8
125 distance uint8
126
127 }
128
129 type regionIntelligibility struct {
130 lang uint16
131 script uint8
132 group uint8
133 distance uint8
134
135 }
136
137
138
139
140
141
142
143 func (b *builder) writeMatchData() {
144 lm := b.supp.LanguageMatching.LanguageMatches
145 cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
146
147 regionHierarchy := map[string][]string{}
148 for _, g := range b.supp.TerritoryContainment.Group {
149 regions := strings.Split(g.Contains, " ")
150 regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
151 }
152
153
154 regionToGroups := make([]uint8, language.NumRegions+1)
155
156 idToIndex := map[string]uint8{}
157 for i, mv := range lm[0].MatchVariable {
158 if i > 6 {
159 log.Fatalf("Too many groups: %d", i)
160 }
161 idToIndex[mv.Id] = uint8(i + 1)
162
163 for _, r := range strings.Split(mv.Value, "+") {
164 todo := []string{r}
165 for k := 0; k < len(todo); k++ {
166 r := todo[k]
167 regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
168 todo = append(todo, regionHierarchy[r]...)
169 }
170 }
171 }
172 b.w.WriteVar("regionToGroups", regionToGroups)
173
174
175 paradigmLocales := [][3]uint16{}
176 locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
177 for i := 0; i < len(locales); i += 2 {
178 x := [3]uint16{}
179 for j := 0; j < 2; j++ {
180 pc := strings.SplitN(locales[i+j], "-", 2)
181 x[0] = b.langIndex(pc[0])
182 if len(pc) == 2 {
183 x[1+j] = uint16(b.regionIndex(pc[1]))
184 }
185 }
186 paradigmLocales = append(paradigmLocales, x)
187 }
188 b.w.WriteVar("paradigmLocales", paradigmLocales)
189
190 b.w.WriteType(mutualIntelligibility{})
191 b.w.WriteType(scriptIntelligibility{})
192 b.w.WriteType(regionIntelligibility{})
193
194 matchLang := []mutualIntelligibility{}
195 matchScript := []scriptIntelligibility{}
196 matchRegion := []regionIntelligibility{}
197
198 for _, m := range lm[0].LanguageMatch {
199
200 desired := strings.Replace(m.Desired, "-", "_", -1)
201 supported := strings.Replace(m.Supported, "-", "_", -1)
202 d := strings.Split(desired, "_")
203 s := strings.Split(supported, "_")
204 if len(d) != len(s) {
205 log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
206 continue
207 }
208 distance, _ := strconv.ParseInt(m.Distance, 10, 8)
209 switch len(d) {
210 case 2:
211 if desired == supported && desired == "*_*" {
212 continue
213 }
214
215 matchScript = append(matchScript, scriptIntelligibility{
216 wantLang: uint16(b.langIndex(d[0])),
217 haveLang: uint16(b.langIndex(s[0])),
218 wantScript: uint8(b.scriptIndex(d[1])),
219 haveScript: uint8(b.scriptIndex(s[1])),
220 distance: uint8(distance),
221 })
222 if m.Oneway != "true" {
223 matchScript = append(matchScript, scriptIntelligibility{
224 wantLang: uint16(b.langIndex(s[0])),
225 haveLang: uint16(b.langIndex(d[0])),
226 wantScript: uint8(b.scriptIndex(s[1])),
227 haveScript: uint8(b.scriptIndex(d[1])),
228 distance: uint8(distance),
229 })
230 }
231 case 1:
232 if desired == supported && desired == "*" {
233 continue
234 }
235 if distance == 1 {
236
237
238 if d[0] != "no" || s[0] != "nb" {
239 log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
240 }
241 continue
242 }
243
244 matchLang = append(matchLang, mutualIntelligibility{
245 want: uint16(b.langIndex(d[0])),
246 have: uint16(b.langIndex(s[0])),
247 distance: uint8(distance),
248 oneway: m.Oneway == "true",
249 })
250 case 3:
251 if desired == supported && desired == "*_*_*" {
252 continue
253 }
254 if desired != supported {
255
256
257
258
259 if supported != "en_*_GB" {
260 log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
261 }
262 continue
263 }
264 ri := regionIntelligibility{
265 lang: b.langIndex(d[0]),
266 distance: uint8(distance),
267 }
268 if d[1] != "*" {
269 ri.script = uint8(b.scriptIndex(d[1]))
270 }
271 switch {
272 case d[2] == "*":
273 ri.group = 0x80
274 case strings.HasPrefix(d[2], "$!"):
275 ri.group = 0x80
276 d[2] = "$" + d[2][len("$!"):]
277 fallthrough
278 case strings.HasPrefix(d[2], "$"):
279 ri.group |= idToIndex[d[2]]
280 }
281 matchRegion = append(matchRegion, ri)
282 default:
283 log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
284 }
285 }
286 sort.SliceStable(matchLang, func(i, j int) bool {
287 return matchLang[i].distance < matchLang[j].distance
288 })
289 b.w.WriteComment(`
290 matchLang holds pairs of langIDs of base languages that are typically
291 mutually intelligible. Each pair is associated with a confidence and
292 whether the intelligibility goes one or both ways.`)
293 b.w.WriteVar("matchLang", matchLang)
294
295 b.w.WriteComment(`
296 matchScript holds pairs of scriptIDs where readers of one script
297 can typically also read the other. Each is associated with a confidence.`)
298 sort.SliceStable(matchScript, func(i, j int) bool {
299 return matchScript[i].distance < matchScript[j].distance
300 })
301 b.w.WriteVar("matchScript", matchScript)
302
303 sort.SliceStable(matchRegion, func(i, j int) bool {
304 return matchRegion[i].distance < matchRegion[j].distance
305 })
306 b.w.WriteVar("matchRegion", matchRegion)
307 }
308
View as plain text