...

Source file src/golang.org/x/text/language/display/maketables.go

Documentation: golang.org/x/text/language/display

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  
     7  // Generator for display name tables.
     8  
     9  package main
    10  
    11  import (
    12  	"bytes"
    13  	"flag"
    14  	"fmt"
    15  	"log"
    16  	"reflect"
    17  	"sort"
    18  	"strings"
    19  
    20  	"golang.org/x/text/internal/gen"
    21  	"golang.org/x/text/language"
    22  	"golang.org/x/text/unicode/cldr"
    23  )
    24  
    25  var (
    26  	test = flag.Bool("test", false,
    27  		"test existing tables; can be used to compare web data with package data.")
    28  	outputFile = flag.String("output", "tables.go", "output file")
    29  
    30  	stats = flag.Bool("stats", false, "prints statistics to stderr")
    31  
    32  	short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
    33  	draft = flag.String("draft",
    34  		"contributed",
    35  		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
    36  	pkg = flag.String("package",
    37  		"display",
    38  		"the name of the package in which the generated file is to be included")
    39  
    40  	tags = newTagSet("tags",
    41  		[]language.Tag{},
    42  		"space-separated list of tags to include or empty for all")
    43  	dict = newTagSet("dict",
    44  		dictTags(),
    45  		"space-separated list or tags for which to include a Dictionary. "+
    46  			`"" means the common list from go.text/language.`)
    47  )
    48  
    49  func dictTags() (tag []language.Tag) {
    50  	// TODO: replace with language.Common.Tags() once supported.
    51  	const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
    52  		"es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
    53  		"ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
    54  		"pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
    55  		"zh zh-Hans zh-Hant zu"
    56  
    57  	for _, s := range strings.Split(str, " ") {
    58  		tag = append(tag, language.MustParse(s))
    59  	}
    60  	return tag
    61  }
    62  
    63  func main() {
    64  	gen.Init()
    65  
    66  	// Read the CLDR zip file.
    67  	r := gen.OpenCLDRCoreZip()
    68  	defer r.Close()
    69  
    70  	d := &cldr.Decoder{}
    71  	d.SetDirFilter("main", "supplemental")
    72  	d.SetSectionFilter("localeDisplayNames")
    73  	data, err := d.DecodeZip(r)
    74  	if err != nil {
    75  		log.Fatalf("DecodeZip: %v", err)
    76  	}
    77  
    78  	w := gen.NewCodeWriter()
    79  	defer w.WriteGoFile(*outputFile, "display")
    80  
    81  	gen.WriteCLDRVersion(w)
    82  
    83  	b := builder{
    84  		w:     w,
    85  		data:  data,
    86  		group: make(map[string]*group),
    87  	}
    88  	b.generate()
    89  }
    90  
    91  const tagForm = language.All
    92  
    93  // tagSet is used to parse command line flags of tags. It implements the
    94  // flag.Value interface.
    95  type tagSet map[language.Tag]bool
    96  
    97  func newTagSet(name string, tags []language.Tag, usage string) tagSet {
    98  	f := tagSet(make(map[language.Tag]bool))
    99  	for _, t := range tags {
   100  		f[t] = true
   101  	}
   102  	flag.Var(f, name, usage)
   103  	return f
   104  }
   105  
   106  // String implements the String method of the flag.Value interface.
   107  func (f tagSet) String() string {
   108  	tags := []string{}
   109  	for t := range f {
   110  		tags = append(tags, t.String())
   111  	}
   112  	sort.Strings(tags)
   113  	return strings.Join(tags, " ")
   114  }
   115  
   116  // Set implements Set from the flag.Value interface.
   117  func (f tagSet) Set(s string) error {
   118  	if s != "" {
   119  		for _, s := range strings.Split(s, " ") {
   120  			if s != "" {
   121  				tag, err := tagForm.Parse(s)
   122  				if err != nil {
   123  					return err
   124  				}
   125  				f[tag] = true
   126  			}
   127  		}
   128  	}
   129  	return nil
   130  }
   131  
   132  func (f tagSet) contains(t language.Tag) bool {
   133  	if len(f) == 0 {
   134  		return true
   135  	}
   136  	return f[t]
   137  }
   138  
   139  // builder is used to create all tables with display name information.
   140  type builder struct {
   141  	w *gen.CodeWriter
   142  
   143  	data *cldr.CLDR
   144  
   145  	fromLocs []string
   146  
   147  	// destination tags for the current locale.
   148  	toTags     []string
   149  	toTagIndex map[string]int
   150  
   151  	// list of supported tags
   152  	supported []language.Tag
   153  
   154  	// key-value pairs per group
   155  	group map[string]*group
   156  
   157  	// statistics
   158  	sizeIndex int // total size of all indexes of headers
   159  	sizeData  int // total size of all data of headers
   160  	totalSize int
   161  }
   162  
   163  type group struct {
   164  	// Maps from a given language to the Namer data for this language.
   165  	lang    map[language.Tag]keyValues
   166  	headers []header
   167  
   168  	toTags        []string
   169  	threeStart    int
   170  	fourPlusStart int
   171  }
   172  
   173  // set sets the typ to the name for locale loc.
   174  func (g *group) set(t language.Tag, typ, name string) {
   175  	kv := g.lang[t]
   176  	if kv == nil {
   177  		kv = make(keyValues)
   178  		g.lang[t] = kv
   179  	}
   180  	if kv[typ] == "" {
   181  		kv[typ] = name
   182  	}
   183  }
   184  
   185  type keyValues map[string]string
   186  
   187  type header struct {
   188  	tag   language.Tag
   189  	data  string
   190  	index []uint16
   191  }
   192  
   193  var versionInfo = `// Version is deprecated. Use CLDRVersion.
   194  const Version = %#v
   195  
   196  `
   197  
   198  var self = language.MustParse("mul")
   199  
   200  // generate builds and writes all tables.
   201  func (b *builder) generate() {
   202  	fmt.Fprintf(b.w, versionInfo, cldr.Version)
   203  
   204  	b.filter()
   205  	b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
   206  		if ldn.Languages != nil {
   207  			for _, v := range ldn.Languages.Language {
   208  				lang := v.Type
   209  				if lang == "root" {
   210  					// We prefer the data from "und"
   211  					// TODO: allow both the data for root and und somehow.
   212  					continue
   213  				}
   214  				tag := tagForm.MustParse(lang)
   215  				if tags.contains(tag) {
   216  					g.set(loc, tag.String(), v.Data())
   217  				}
   218  			}
   219  		}
   220  	})
   221  	b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
   222  		if ldn.Scripts != nil {
   223  			for _, v := range ldn.Scripts.Script {
   224  				code := language.MustParseScript(v.Type)
   225  				if code.IsPrivateUse() { // Qaaa..Qabx
   226  					// TODO: data currently appears to be very meager.
   227  					// Reconsider if we have data for English.
   228  					if loc == language.English {
   229  						log.Fatal("Consider including data for private use scripts.")
   230  					}
   231  					continue
   232  				}
   233  				g.set(loc, code.String(), v.Data())
   234  			}
   235  		}
   236  	})
   237  	b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
   238  		if ldn.Territories != nil {
   239  			for _, v := range ldn.Territories.Territory {
   240  				g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
   241  			}
   242  		}
   243  	})
   244  
   245  	b.makeSupported()
   246  
   247  	b.writeParents()
   248  
   249  	b.writeGroup("lang")
   250  	b.writeGroup("script")
   251  	b.writeGroup("region")
   252  
   253  	b.w.WriteConst("numSupported", len(b.supported))
   254  	buf := bytes.Buffer{}
   255  	for _, tag := range b.supported {
   256  		fmt.Fprint(&buf, tag.String(), "|")
   257  	}
   258  	b.w.WriteConst("supported", buf.String())
   259  
   260  	b.writeDictionaries()
   261  
   262  	b.supported = []language.Tag{self}
   263  
   264  	// Compute the names of locales in their own language. Some of these names
   265  	// may be specified in their parent locales. We iterate the maximum depth
   266  	// of the parent three times to match successive parents of tags until a
   267  	// possible match is found.
   268  	for i := 0; i < 4; i++ {
   269  		b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
   270  			parent := tag
   271  			if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
   272  				parent, _ = language.Raw.Compose(b)
   273  			}
   274  			if ldn.Languages != nil {
   275  				for _, v := range ldn.Languages.Language {
   276  					key := tagForm.MustParse(v.Type)
   277  					saved := key
   278  					if key == parent {
   279  						g.set(self, tag.String(), v.Data())
   280  					}
   281  					for k := 0; k < i; k++ {
   282  						key = key.Parent()
   283  					}
   284  					if key == tag {
   285  						g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
   286  					}
   287  				}
   288  			}
   289  		})
   290  	}
   291  
   292  	b.writeGroup("self")
   293  }
   294  
   295  func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
   296  	b.sizeIndex = 0
   297  	b.sizeData = 0
   298  	b.toTags = nil
   299  	b.fromLocs = nil
   300  	b.toTagIndex = make(map[string]int)
   301  
   302  	g := b.group[name]
   303  	if g == nil {
   304  		g = &group{lang: make(map[language.Tag]keyValues)}
   305  		b.group[name] = g
   306  	}
   307  	for _, loc := range b.data.Locales() {
   308  		// We use RawLDML instead of LDML as we are managing our own inheritance
   309  		// in this implementation.
   310  		ldml := b.data.RawLDML(loc)
   311  
   312  		// We do not support the POSIX variant (it is not a supported BCP 47
   313  		// variant). This locale also doesn't happen to contain any data, so
   314  		// we'll skip it by checking for this.
   315  		tag, err := tagForm.Parse(loc)
   316  		if err != nil {
   317  			if ldml.LocaleDisplayNames != nil {
   318  				log.Fatalf("setData: %v", err)
   319  			}
   320  			continue
   321  		}
   322  		if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
   323  			f(g, tag, ldml.LocaleDisplayNames)
   324  		}
   325  	}
   326  }
   327  
   328  func (b *builder) filter() {
   329  	filter := func(s *cldr.Slice) {
   330  		if *short {
   331  			s.SelectOnePerGroup("alt", []string{"short", ""})
   332  		} else {
   333  			s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
   334  		}
   335  		d, err := cldr.ParseDraft(*draft)
   336  		if err != nil {
   337  			log.Fatalf("filter: %v", err)
   338  		}
   339  		s.SelectDraft(d)
   340  	}
   341  	for _, loc := range b.data.Locales() {
   342  		if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
   343  			if ldn.Languages != nil {
   344  				s := cldr.MakeSlice(&ldn.Languages.Language)
   345  				if filter(&s); len(ldn.Languages.Language) == 0 {
   346  					ldn.Languages = nil
   347  				}
   348  			}
   349  			if ldn.Scripts != nil {
   350  				s := cldr.MakeSlice(&ldn.Scripts.Script)
   351  				if filter(&s); len(ldn.Scripts.Script) == 0 {
   352  					ldn.Scripts = nil
   353  				}
   354  			}
   355  			if ldn.Territories != nil {
   356  				s := cldr.MakeSlice(&ldn.Territories.Territory)
   357  				if filter(&s); len(ldn.Territories.Territory) == 0 {
   358  					ldn.Territories = nil
   359  				}
   360  			}
   361  		}
   362  	}
   363  }
   364  
   365  // makeSupported creates a list of all supported locales.
   366  func (b *builder) makeSupported() {
   367  	// tags across groups
   368  	for _, g := range b.group {
   369  		for t, _ := range g.lang {
   370  			b.supported = append(b.supported, t)
   371  		}
   372  	}
   373  	b.supported = b.supported[:unique(tagsSorter(b.supported))]
   374  
   375  }
   376  
   377  type tagsSorter []language.Tag
   378  
   379  func (a tagsSorter) Len() int           { return len(a) }
   380  func (a tagsSorter) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   381  func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
   382  
   383  func (b *builder) writeGroup(name string) {
   384  	g := b.group[name]
   385  
   386  	for _, kv := range g.lang {
   387  		for t, _ := range kv {
   388  			g.toTags = append(g.toTags, t)
   389  		}
   390  	}
   391  	g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
   392  
   393  	// Allocate header per supported value.
   394  	g.headers = make([]header, len(b.supported))
   395  	for i, sup := range b.supported {
   396  		kv, ok := g.lang[sup]
   397  		if !ok {
   398  			g.headers[i].tag = sup
   399  			continue
   400  		}
   401  		data := []byte{}
   402  		index := make([]uint16, len(g.toTags), len(g.toTags)+1)
   403  		for j, t := range g.toTags {
   404  			index[j] = uint16(len(data))
   405  			data = append(data, kv[t]...)
   406  		}
   407  		index = append(index, uint16(len(data)))
   408  
   409  		// Trim the tail of the index.
   410  		// TODO: indexes can be reduced in size quite a bit more.
   411  		n := len(index)
   412  		for ; n >= 2 && index[n-2] == index[n-1]; n-- {
   413  		}
   414  		index = index[:n]
   415  
   416  		// Workaround for a bug in CLDR 26.
   417  		// See https://unicode.org/cldr/trac/ticket/8042.
   418  		if cldr.Version == "26" && sup.String() == "hsb" {
   419  			data = bytes.Replace(data, []byte{'"'}, nil, 1)
   420  		}
   421  		g.headers[i] = header{sup, string(data), index}
   422  	}
   423  	g.writeTable(b.w, name)
   424  }
   425  
   426  type tagsBySize []string
   427  
   428  func (l tagsBySize) Len() int      { return len(l) }
   429  func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
   430  func (l tagsBySize) Less(i, j int) bool {
   431  	a, b := l[i], l[j]
   432  	// Sort single-tag entries based on size first. Otherwise alphabetic.
   433  	if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
   434  		return len(a) < len(b)
   435  	}
   436  	return a < b
   437  }
   438  
   439  // parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
   440  // of tags[i].
   441  func parentIndices(tags []language.Tag) []int16 {
   442  	index := make(map[language.Tag]int16)
   443  	for i, t := range tags {
   444  		index[t] = int16(i)
   445  	}
   446  
   447  	// Construct default parents.
   448  	parents := make([]int16, len(tags))
   449  	for i, t := range tags {
   450  		parents[i] = -1
   451  		for t = t.Parent(); t != language.Und; t = t.Parent() {
   452  			if j, ok := index[t]; ok {
   453  				parents[i] = j
   454  				break
   455  			}
   456  		}
   457  	}
   458  	return parents
   459  }
   460  
   461  func (b *builder) writeParents() {
   462  	parents := parentIndices(b.supported)
   463  	fmt.Fprintf(b.w, "var parents = ")
   464  	b.w.WriteArray(parents)
   465  }
   466  
   467  // writeKeys writes keys to a special index used by the display package.
   468  // tags are assumed to be sorted by length.
   469  func writeKeys(w *gen.CodeWriter, name string, keys []string) {
   470  	w.Size += int(3 * reflect.TypeOf("").Size())
   471  	w.WriteComment("Number of keys: %d", len(keys))
   472  	fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
   473  	for i := 2; i <= 4; i++ {
   474  		sub := []string{}
   475  		for _, t := range keys {
   476  			if len(t) != i {
   477  				break
   478  			}
   479  			sub = append(sub, t)
   480  		}
   481  		s := strings.Join(sub, "")
   482  		w.WriteString(s)
   483  		fmt.Fprintf(w, ",\n")
   484  		keys = keys[len(sub):]
   485  	}
   486  	fmt.Fprintln(w, "\t}")
   487  	if len(keys) > 0 {
   488  		w.Size += int(reflect.TypeOf([]string{}).Size())
   489  		fmt.Fprintf(w, "\t%sTagsLong = ", name)
   490  		w.WriteSlice(keys)
   491  	}
   492  	fmt.Fprintln(w, ")\n")
   493  }
   494  
   495  // identifier creates an identifier from the given tag.
   496  func identifier(t language.Tag) string {
   497  	return strings.Replace(t.String(), "-", "", -1)
   498  }
   499  
   500  func (h *header) writeEntry(w *gen.CodeWriter, name string) {
   501  	if len(dict) > 0 && dict.contains(h.tag) {
   502  		fmt.Fprintf(w, "\t{ // %s\n", h.tag)
   503  		fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
   504  		fmt.Fprintln(w, "\t},")
   505  	} else if len(h.data) == 0 {
   506  		fmt.Fprintln(w, "\t\t{}, //", h.tag)
   507  	} else {
   508  		fmt.Fprintf(w, "\t{ // %s\n", h.tag)
   509  		w.WriteString(h.data)
   510  		fmt.Fprintln(w, ",")
   511  		w.WriteSlice(h.index)
   512  		fmt.Fprintln(w, ",\n\t},")
   513  	}
   514  }
   515  
   516  // write the data for the given header as single entries. The size for this data
   517  // was already accounted for in writeEntry.
   518  func (h *header) writeSingle(w *gen.CodeWriter, name string) {
   519  	if len(dict) > 0 && dict.contains(h.tag) {
   520  		tag := identifier(h.tag)
   521  		w.WriteConst(tag+name+"Str", h.data)
   522  
   523  		// Note that we create a slice instead of an array. If we use an array
   524  		// we need to refer to it as a[:] in other tables, which will cause the
   525  		// array to always be included by the linker. See Issue 7651.
   526  		w.WriteVar(tag+name+"Idx", h.index)
   527  	}
   528  }
   529  
   530  // writeTable writes an entry for a single Namer.
   531  func (g *group) writeTable(w *gen.CodeWriter, name string) {
   532  	start := w.Size
   533  	writeKeys(w, name, g.toTags)
   534  	w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
   535  
   536  	fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
   537  
   538  	title := strings.Title(name)
   539  	for _, h := range g.headers {
   540  		h.writeEntry(w, title)
   541  	}
   542  	fmt.Fprintln(w, "}\n")
   543  
   544  	for _, h := range g.headers {
   545  		h.writeSingle(w, title)
   546  	}
   547  	n := w.Size - start
   548  	fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
   549  }
   550  
   551  func (b *builder) writeDictionaries() {
   552  	fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
   553  	fmt.Fprintln(b.w, "var (")
   554  	parents := parentIndices(b.supported)
   555  
   556  	for i, t := range b.supported {
   557  		if dict.contains(t) {
   558  			ident := identifier(t)
   559  			fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
   560  			if p := parents[i]; p == -1 {
   561  				fmt.Fprintln(b.w, "\t\tnil,")
   562  			} else {
   563  				fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
   564  			}
   565  			fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
   566  			fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
   567  			fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
   568  			fmt.Fprintln(b.w, "\t}")
   569  		}
   570  	}
   571  	fmt.Fprintln(b.w, ")")
   572  
   573  	var s string
   574  	var a []uint16
   575  	sz := reflect.TypeOf(s).Size()
   576  	sz += reflect.TypeOf(a).Size()
   577  	sz *= 3
   578  	sz += reflect.TypeOf(&a).Size()
   579  	n := int(sz) * len(dict)
   580  	fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
   581  
   582  	b.w.Size += n
   583  }
   584  
   585  // unique sorts the given lists and removes duplicate entries by swapping them
   586  // past position k, where k is the number of unique values. It returns k.
   587  func unique(a sort.Interface) int {
   588  	if a.Len() == 0 {
   589  		return 0
   590  	}
   591  	sort.Sort(a)
   592  	k := 1
   593  	for i := 1; i < a.Len(); i++ {
   594  		if a.Less(k-1, i) {
   595  			if k != i {
   596  				a.Swap(k, i)
   597  			}
   598  			k++
   599  		}
   600  	}
   601  	return k
   602  }
   603  

View as plain text