1
2
3
4
5
6
7
8 package main
9
10 import (
11 "archive/zip"
12 "bytes"
13 "encoding/xml"
14 "flag"
15 "fmt"
16 "io"
17 "log"
18 "os"
19 "regexp"
20 "strings"
21
22 "golang.org/x/text/internal/gen"
23 )
24
25 var outputFile = flag.String("output", "xml.go", "output file name")
26
27 func main() {
28 flag.Parse()
29
30 r := gen.OpenCLDRCoreZip()
31 buffer, err := io.ReadAll(r)
32 if err != nil {
33 log.Fatal("Could not read zip file")
34 }
35 r.Close()
36 z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
37 if err != nil {
38 log.Fatalf("Could not read zip archive: %v", err)
39 }
40
41 var buf bytes.Buffer
42
43 version := gen.CLDRVersion()
44
45 for _, dtd := range files {
46 for _, f := range z.File {
47 if strings.HasSuffix(f.Name, dtd.file+".dtd") {
48 r, err := f.Open()
49 failOnError(err)
50
51 b := makeBuilder(&buf, dtd)
52 b.parseDTD(r)
53 b.resolve(b.index[dtd.top[0]])
54 b.write()
55 if b.version != "" && version != b.version {
56 println(f.Name)
57 log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
58 }
59 break
60 }
61 }
62 }
63 fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
64 fmt.Fprintf(&buf, "const Version = %q\n", version)
65
66 gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
67 }
68
69 func failOnError(err error) {
70 if err != nil {
71 log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
72 os.Exit(1)
73 }
74 }
75
76
77 type dtd struct {
78 file string
79 root string
80 top []string
81
82 skipElem []string
83 skipAttr []string
84 predefined []string
85 forceRepeat []string
86 }
87
88 var files = []dtd{
89 {
90 file: "ldmlBCP47",
91 root: "LDMLBCP47",
92 top: []string{"ldmlBCP47"},
93 skipElem: []string{
94 "cldrVersion",
95 },
96 },
97 {
98 file: "ldmlSupplemental",
99 root: "SupplementalData",
100 top: []string{"supplementalData"},
101 skipElem: []string{
102 "cldrVersion",
103 },
104 forceRepeat: []string{
105 "plurals",
106 },
107 },
108 {
109 file: "ldml",
110 root: "LDML",
111 top: []string{
112 "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
113 },
114 skipElem: []string{
115 "cp",
116 "special",
117 "fallback",
118 "alias",
119 "default",
120 },
121 skipAttr: []string{
122 "hiraganaQuarternary",
123 },
124 predefined: []string{"rules"},
125 },
126 }
127
128 var comments = map[string]string{
129 "ldmlBCP47": `
130 // LDMLBCP47 holds information on allowable values for various variables in LDML.
131 `,
132 "supplementalData": `
133 // SupplementalData holds information relevant for internationalization
134 // and proper use of CLDR, but that is not contained in the locale hierarchy.
135 `,
136 "ldml": `
137 // LDML is the top-level type for locale-specific data.
138 `,
139 "collation": `
140 // Collation contains rules that specify a certain sort-order,
141 // as a tailoring of the root order.
142 // The parsed rules are obtained by passing a RuleProcessor to Collation's
143 // Process method.
144 `,
145 "calendar": `
146 // Calendar specifies the fields used for formatting and parsing dates and times.
147 // The month and quarter names are identified numerically, starting at 1.
148 // The day (of the week) names are identified with short strings, since there is
149 // no universally-accepted numeric designation.
150 `,
151 "dates": `
152 // Dates contains information regarding the format and parsing of dates and times.
153 `,
154 "localeDisplayNames": `
155 // LocaleDisplayNames specifies localized display names for scripts, languages,
156 // countries, currencies, and variants.
157 `,
158 "numbers": `
159 // Numbers supplies information for formatting and parsing numbers and currencies.
160 `,
161 }
162
163 type element struct {
164 name string
165 category string
166 signature string
167
168 attr []*attribute
169 sub []struct {
170 e *element
171 repeat bool
172 }
173
174 resolved bool
175 }
176
177 type attribute struct {
178 name string
179 key string
180 list []string
181
182 tag string
183 }
184
185 var (
186 reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
187 reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
188 reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
189 reToken = regexp.MustCompile(`\w\-`)
190 )
191
192
193
194 type builder struct {
195 w io.Writer
196 index map[string]*element
197 elem []*element
198 info dtd
199 version string
200 }
201
202 func makeBuilder(w io.Writer, d dtd) builder {
203 return builder{
204 w: w,
205 index: make(map[string]*element),
206 elem: []*element{},
207 info: d,
208 }
209 }
210
211
212 func (b *builder) parseDTD(r io.Reader) {
213 for d := xml.NewDecoder(r); ; {
214 t, err := d.Token()
215 if t == nil {
216 break
217 }
218 failOnError(err)
219 dir, ok := t.(xml.Directive)
220 if !ok {
221 continue
222 }
223 m := reHead.FindSubmatch(dir)
224 dir = dir[len(m[0]):]
225 ename := string(m[2])
226 el, elementFound := b.index[ename]
227 switch string(m[1]) {
228 case "ELEMENT":
229 if elementFound {
230 log.Fatal("parseDTD: duplicate entry for element %q", ename)
231 }
232 m := reElem.FindSubmatch(dir)
233 if m == nil {
234 log.Fatalf("parseDTD: invalid element %q", string(dir))
235 }
236 if len(m[0]) != len(dir) {
237 log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
238 }
239 s := string(m[1])
240 el = &element{
241 name: ename,
242 category: s,
243 }
244 b.index[ename] = el
245 case "ATTLIST":
246 if !elementFound {
247 log.Fatalf("parseDTD: unknown element %q", ename)
248 }
249 s := string(dir)
250 m := reAttr.FindStringSubmatch(s)
251 if m == nil {
252 log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
253 }
254 if m[4] == "FIXED" {
255 b.version = m[5]
256 } else {
257 switch m[1] {
258 case "draft", "references", "alt", "validSubLocales", "standard" :
259 case "type", "choice":
260 default:
261 el.attr = append(el.attr, &attribute{
262 name: m[1],
263 key: s,
264 list: reToken.FindAllString(m[3], -1),
265 })
266 el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
267 }
268 }
269 }
270 }
271 }
272
273 var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
274
275
276
277 func (b *builder) resolve(e *element) {
278 if e.resolved {
279 return
280 }
281 b.elem = append(b.elem, e)
282 e.resolved = true
283 s := e.category
284 found := make(map[string]bool)
285 sequenceStart := []int{}
286 for len(s) > 0 {
287 m := reCat.FindStringSubmatch(s)
288 if m == nil {
289 log.Fatalf("%s: invalid category string %q", e.name, s)
290 }
291 repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
292 switch m[1] {
293 case "":
294 case "(":
295 sequenceStart = append(sequenceStart, len(e.sub))
296 case ")":
297 if len(sequenceStart) == 0 {
298 log.Fatalf("%s: unmatched closing parenthesis", e.name)
299 }
300 for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
301 e.sub[i].repeat = e.sub[i].repeat || repeat
302 }
303 sequenceStart = sequenceStart[:len(sequenceStart)-1]
304 default:
305 if in(b.info.skipElem, m[1]) {
306 } else if sub, ok := b.index[m[1]]; ok {
307 if !found[sub.name] {
308 e.sub = append(e.sub, struct {
309 e *element
310 repeat bool
311 }{sub, repeat})
312 found[sub.name] = true
313 b.resolve(sub)
314 }
315 } else if m[1] == "#PCDATA" || m[1] == "ANY" {
316 } else if m[1] != "EMPTY" {
317 log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
318 }
319 }
320 s = s[len(m[0]):]
321 }
322 }
323
324
325 func in(set []string, s string) bool {
326 for _, v := range set {
327 if v == s {
328 return true
329 }
330 }
331 return false
332 }
333
334 var repl = strings.NewReplacer("-", " ", "_", " ")
335
336
337
338 func title(s string) string {
339 return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
340 }
341
342
343 func (b *builder) writeElem(tab int, e *element) {
344 p := func(f string, x ...interface{}) {
345 f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
346 fmt.Fprintf(b.w, f, x...)
347 }
348 if len(e.sub) == 0 && len(e.attr) == 0 {
349 p("Common")
350 return
351 }
352 p("struct {")
353 tab++
354 p("\nCommon")
355 for _, attr := range e.attr {
356 if !in(b.info.skipAttr, attr.name) {
357 p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
358 }
359 }
360 for _, sub := range e.sub {
361 if in(b.info.predefined, sub.e.name) {
362 p("\n%sElem", sub.e.name)
363 continue
364 }
365 if in(b.info.skipElem, sub.e.name) {
366 continue
367 }
368 p("\n%s ", title(sub.e.name))
369 if sub.repeat {
370 p("[]")
371 }
372 p("*")
373 if in(b.info.top, sub.e.name) {
374 p(title(sub.e.name))
375 } else {
376 b.writeElem(tab, sub.e)
377 }
378 p(" `xml:\"%s\"`", sub.e.name)
379 }
380 tab--
381 p("\n}")
382 }
383
384
385 func (b *builder) write() {
386 for i, name := range b.info.top {
387 e := b.index[name]
388 if e != nil {
389 fmt.Fprintf(b.w, comments[name])
390 name := title(e.name)
391 if i == 0 {
392 name = b.info.root
393 }
394 fmt.Fprintf(b.w, "type %s ", name)
395 b.writeElem(0, e)
396 fmt.Fprint(b.w, "\n")
397 }
398 }
399 }
400
View as plain text