...

Source file src/golang.org/x/text/unicode/cldr/makexml.go

Documentation: golang.org/x/text/unicode/cldr

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  
     7  // This tool generates types for the various XML formats of CLDR.
     8  package main
     9  
    10  import (
    11  	"archive/zip"
    12  	"bytes"
    13  	"encoding/xml"
    14  	"flag"
    15  	"fmt"
    16  	"io"
    17  	"log"
    18  	"os"
    19  	"regexp"
    20  	"strings"
    21  
    22  	"golang.org/x/text/internal/gen"
    23  )
    24  
    25  var outputFile = flag.String("output", "xml.go", "output file name")
    26  
    27  func main() {
    28  	flag.Parse()
    29  
    30  	r := gen.OpenCLDRCoreZip()
    31  	buffer, err := io.ReadAll(r)
    32  	if err != nil {
    33  		log.Fatal("Could not read zip file")
    34  	}
    35  	r.Close()
    36  	z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
    37  	if err != nil {
    38  		log.Fatalf("Could not read zip archive: %v", err)
    39  	}
    40  
    41  	var buf bytes.Buffer
    42  
    43  	version := gen.CLDRVersion()
    44  
    45  	for _, dtd := range files {
    46  		for _, f := range z.File {
    47  			if strings.HasSuffix(f.Name, dtd.file+".dtd") {
    48  				r, err := f.Open()
    49  				failOnError(err)
    50  
    51  				b := makeBuilder(&buf, dtd)
    52  				b.parseDTD(r)
    53  				b.resolve(b.index[dtd.top[0]])
    54  				b.write()
    55  				if b.version != "" && version != b.version {
    56  					println(f.Name)
    57  					log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
    58  				}
    59  				break
    60  			}
    61  		}
    62  	}
    63  	fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
    64  	fmt.Fprintf(&buf, "const Version = %q\n", version)
    65  
    66  	gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
    67  }
    68  
    69  func failOnError(err error) {
    70  	if err != nil {
    71  		log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
    72  		os.Exit(1)
    73  	}
    74  }
    75  
    76  // configuration data per DTD type
    77  type dtd struct {
    78  	file string   // base file name
    79  	root string   // Go name of the root XML element
    80  	top  []string // create a different type for this section
    81  
    82  	skipElem    []string // hard-coded or deprecated elements
    83  	skipAttr    []string // attributes to exclude
    84  	predefined  []string // hard-coded elements exist of the form <name>Elem
    85  	forceRepeat []string // elements to make slices despite DTD
    86  }
    87  
    88  var files = []dtd{
    89  	{
    90  		file: "ldmlBCP47",
    91  		root: "LDMLBCP47",
    92  		top:  []string{"ldmlBCP47"},
    93  		skipElem: []string{
    94  			"cldrVersion", // deprecated, not used
    95  		},
    96  	},
    97  	{
    98  		file: "ldmlSupplemental",
    99  		root: "SupplementalData",
   100  		top:  []string{"supplementalData"},
   101  		skipElem: []string{
   102  			"cldrVersion", // deprecated, not used
   103  		},
   104  		forceRepeat: []string{
   105  			"plurals", // data defined in plurals.xml and ordinals.xml
   106  		},
   107  	},
   108  	{
   109  		file: "ldml",
   110  		root: "LDML",
   111  		top: []string{
   112  			"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
   113  		},
   114  		skipElem: []string{
   115  			"cp",       // not used anywhere
   116  			"special",  // not used anywhere
   117  			"fallback", // deprecated, not used
   118  			"alias",    // in Common
   119  			"default",  // in Common
   120  		},
   121  		skipAttr: []string{
   122  			"hiraganaQuarternary", // typo in DTD, correct version included as well
   123  		},
   124  		predefined: []string{"rules"},
   125  	},
   126  }
   127  
   128  var comments = map[string]string{
   129  	"ldmlBCP47": `
   130  // LDMLBCP47 holds information on allowable values for various variables in LDML.
   131  `,
   132  	"supplementalData": `
   133  // SupplementalData holds information relevant for internationalization
   134  // and proper use of CLDR, but that is not contained in the locale hierarchy.
   135  `,
   136  	"ldml": `
   137  // LDML is the top-level type for locale-specific data.
   138  `,
   139  	"collation": `
   140  // Collation contains rules that specify a certain sort-order,
   141  // as a tailoring of the root order. 
   142  // The parsed rules are obtained by passing a RuleProcessor to Collation's
   143  // Process method.
   144  `,
   145  	"calendar": `
   146  // Calendar specifies the fields used for formatting and parsing dates and times.
   147  // The month and quarter names are identified numerically, starting at 1.
   148  // The day (of the week) names are identified with short strings, since there is
   149  // no universally-accepted numeric designation.
   150  `,
   151  	"dates": `
   152  // Dates contains information regarding the format and parsing of dates and times.
   153  `,
   154  	"localeDisplayNames": `
   155  // LocaleDisplayNames specifies localized display names for scripts, languages,
   156  // countries, currencies, and variants.
   157  `,
   158  	"numbers": `
   159  // Numbers supplies information for formatting and parsing numbers and currencies.
   160  `,
   161  }
   162  
   163  type element struct {
   164  	name      string // XML element name
   165  	category  string // elements contained by this element
   166  	signature string // category + attrKey*
   167  
   168  	attr []*attribute // attributes supported by this element.
   169  	sub  []struct {   // parsed and evaluated sub elements of this element.
   170  		e      *element
   171  		repeat bool // true if the element needs to be a slice
   172  	}
   173  
   174  	resolved bool // prevent multiple resolutions of this element.
   175  }
   176  
   177  type attribute struct {
   178  	name string
   179  	key  string
   180  	list []string
   181  
   182  	tag string // Go tag
   183  }
   184  
   185  var (
   186  	reHead  = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
   187  	reAttr  = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
   188  	reElem  = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
   189  	reToken = regexp.MustCompile(`\w\-`)
   190  )
   191  
   192  // builder is used to read in the DTD files from CLDR and generate Go code
   193  // to be used with the encoding/xml package.
   194  type builder struct {
   195  	w       io.Writer
   196  	index   map[string]*element
   197  	elem    []*element
   198  	info    dtd
   199  	version string
   200  }
   201  
   202  func makeBuilder(w io.Writer, d dtd) builder {
   203  	return builder{
   204  		w:     w,
   205  		index: make(map[string]*element),
   206  		elem:  []*element{},
   207  		info:  d,
   208  	}
   209  }
   210  
   211  // parseDTD parses a DTD file.
   212  func (b *builder) parseDTD(r io.Reader) {
   213  	for d := xml.NewDecoder(r); ; {
   214  		t, err := d.Token()
   215  		if t == nil {
   216  			break
   217  		}
   218  		failOnError(err)
   219  		dir, ok := t.(xml.Directive)
   220  		if !ok {
   221  			continue
   222  		}
   223  		m := reHead.FindSubmatch(dir)
   224  		dir = dir[len(m[0]):]
   225  		ename := string(m[2])
   226  		el, elementFound := b.index[ename]
   227  		switch string(m[1]) {
   228  		case "ELEMENT":
   229  			if elementFound {
   230  				log.Fatal("parseDTD: duplicate entry for element %q", ename)
   231  			}
   232  			m := reElem.FindSubmatch(dir)
   233  			if m == nil {
   234  				log.Fatalf("parseDTD: invalid element %q", string(dir))
   235  			}
   236  			if len(m[0]) != len(dir) {
   237  				log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
   238  			}
   239  			s := string(m[1])
   240  			el = &element{
   241  				name:     ename,
   242  				category: s,
   243  			}
   244  			b.index[ename] = el
   245  		case "ATTLIST":
   246  			if !elementFound {
   247  				log.Fatalf("parseDTD: unknown element %q", ename)
   248  			}
   249  			s := string(dir)
   250  			m := reAttr.FindStringSubmatch(s)
   251  			if m == nil {
   252  				log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
   253  			}
   254  			if m[4] == "FIXED" {
   255  				b.version = m[5]
   256  			} else {
   257  				switch m[1] {
   258  				case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
   259  				case "type", "choice":
   260  				default:
   261  					el.attr = append(el.attr, &attribute{
   262  						name: m[1],
   263  						key:  s,
   264  						list: reToken.FindAllString(m[3], -1),
   265  					})
   266  					el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
   267  				}
   268  			}
   269  		}
   270  	}
   271  }
   272  
   273  var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
   274  
   275  // resolve takes a parsed element and converts it into structured data
   276  // that can be used to generate the XML code.
   277  func (b *builder) resolve(e *element) {
   278  	if e.resolved {
   279  		return
   280  	}
   281  	b.elem = append(b.elem, e)
   282  	e.resolved = true
   283  	s := e.category
   284  	found := make(map[string]bool)
   285  	sequenceStart := []int{}
   286  	for len(s) > 0 {
   287  		m := reCat.FindStringSubmatch(s)
   288  		if m == nil {
   289  			log.Fatalf("%s: invalid category string %q", e.name, s)
   290  		}
   291  		repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
   292  		switch m[1] {
   293  		case "":
   294  		case "(":
   295  			sequenceStart = append(sequenceStart, len(e.sub))
   296  		case ")":
   297  			if len(sequenceStart) == 0 {
   298  				log.Fatalf("%s: unmatched closing parenthesis", e.name)
   299  			}
   300  			for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
   301  				e.sub[i].repeat = e.sub[i].repeat || repeat
   302  			}
   303  			sequenceStart = sequenceStart[:len(sequenceStart)-1]
   304  		default:
   305  			if in(b.info.skipElem, m[1]) {
   306  			} else if sub, ok := b.index[m[1]]; ok {
   307  				if !found[sub.name] {
   308  					e.sub = append(e.sub, struct {
   309  						e      *element
   310  						repeat bool
   311  					}{sub, repeat})
   312  					found[sub.name] = true
   313  					b.resolve(sub)
   314  				}
   315  			} else if m[1] == "#PCDATA" || m[1] == "ANY" {
   316  			} else if m[1] != "EMPTY" {
   317  				log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
   318  			}
   319  		}
   320  		s = s[len(m[0]):]
   321  	}
   322  }
   323  
   324  // return true if s is contained in set.
   325  func in(set []string, s string) bool {
   326  	for _, v := range set {
   327  		if v == s {
   328  			return true
   329  		}
   330  	}
   331  	return false
   332  }
   333  
   334  var repl = strings.NewReplacer("-", " ", "_", " ")
   335  
   336  // title puts the first character or each character following '_' in title case and
   337  // removes all occurrences of '_'.
   338  func title(s string) string {
   339  	return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
   340  }
   341  
   342  // writeElem generates Go code for a single element, recursively.
   343  func (b *builder) writeElem(tab int, e *element) {
   344  	p := func(f string, x ...interface{}) {
   345  		f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
   346  		fmt.Fprintf(b.w, f, x...)
   347  	}
   348  	if len(e.sub) == 0 && len(e.attr) == 0 {
   349  		p("Common")
   350  		return
   351  	}
   352  	p("struct {")
   353  	tab++
   354  	p("\nCommon")
   355  	for _, attr := range e.attr {
   356  		if !in(b.info.skipAttr, attr.name) {
   357  			p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
   358  		}
   359  	}
   360  	for _, sub := range e.sub {
   361  		if in(b.info.predefined, sub.e.name) {
   362  			p("\n%sElem", sub.e.name)
   363  			continue
   364  		}
   365  		if in(b.info.skipElem, sub.e.name) {
   366  			continue
   367  		}
   368  		p("\n%s ", title(sub.e.name))
   369  		if sub.repeat {
   370  			p("[]")
   371  		}
   372  		p("*")
   373  		if in(b.info.top, sub.e.name) {
   374  			p(title(sub.e.name))
   375  		} else {
   376  			b.writeElem(tab, sub.e)
   377  		}
   378  		p(" `xml:\"%s\"`", sub.e.name)
   379  	}
   380  	tab--
   381  	p("\n}")
   382  }
   383  
   384  // write generates the Go XML code.
   385  func (b *builder) write() {
   386  	for i, name := range b.info.top {
   387  		e := b.index[name]
   388  		if e != nil {
   389  			fmt.Fprintf(b.w, comments[name])
   390  			name := title(e.name)
   391  			if i == 0 {
   392  				name = b.info.root
   393  			}
   394  			fmt.Fprintf(b.w, "type %s ", name)
   395  			b.writeElem(0, e)
   396  			fmt.Fprint(b.w, "\n")
   397  		}
   398  	}
   399  }
   400  

View as plain text