...

Source file src/golang.org/x/text/internal/number/gen.go

Documentation: golang.org/x/text/internal/number

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  
     7  package main
     8  
     9  import (
    10  	"flag"
    11  	"fmt"
    12  	"log"
    13  	"reflect"
    14  	"strings"
    15  	"unicode/utf8"
    16  
    17  	"golang.org/x/text/internal/gen"
    18  	"golang.org/x/text/internal/language"
    19  	"golang.org/x/text/internal/language/compact"
    20  	"golang.org/x/text/internal/number"
    21  	"golang.org/x/text/internal/stringset"
    22  	"golang.org/x/text/unicode/cldr"
    23  )
    24  
    25  var (
    26  	test = flag.Bool("test", false,
    27  		"test existing tables; can be used to compare web data with package data.")
    28  	outputFile     = flag.String("output", "tables.go", "output file")
    29  	outputTestFile = flag.String("testoutput", "data_test.go", "output file")
    30  
    31  	draft = flag.String("draft",
    32  		"contributed",
    33  		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
    34  )
    35  
    36  func main() {
    37  	gen.Init()
    38  
    39  	const pkg = "number"
    40  
    41  	gen.Repackage("gen_common.go", "common.go", pkg)
    42  	// Read the CLDR zip file.
    43  	r := gen.OpenCLDRCoreZip()
    44  	defer r.Close()
    45  
    46  	d := &cldr.Decoder{}
    47  	d.SetDirFilter("supplemental", "main")
    48  	d.SetSectionFilter("numbers", "numberingSystem")
    49  	data, err := d.DecodeZip(r)
    50  	if err != nil {
    51  		log.Fatalf("DecodeZip: %v", err)
    52  	}
    53  
    54  	w := gen.NewCodeWriter()
    55  	defer w.WriteGoFile(*outputFile, pkg)
    56  
    57  	fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
    58  
    59  	gen.WriteCLDRVersion(w)
    60  
    61  	genNumSystem(w, data)
    62  	genSymbols(w, data)
    63  	genFormats(w, data)
    64  }
    65  
    66  var systemMap = map[string]system{"latn": 0}
    67  
    68  func getNumberSystem(str string) system {
    69  	ns, ok := systemMap[str]
    70  	if !ok {
    71  		log.Fatalf("No index for numbering system %q", str)
    72  	}
    73  	return ns
    74  }
    75  
    76  func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
    77  	numSysData := []systemData{
    78  		{digitSize: 1, zero: [4]byte{'0'}},
    79  	}
    80  
    81  	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
    82  		if len(ns.Digits) == 0 {
    83  			continue
    84  		}
    85  		switch ns.Id {
    86  		case "latn":
    87  			// hard-wired
    88  			continue
    89  		case "hanidec":
    90  			// non-consecutive digits: treat as "algorithmic"
    91  			continue
    92  		}
    93  
    94  		zero, sz := utf8.DecodeRuneInString(ns.Digits)
    95  		if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
    96  			log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
    97  		}
    98  
    99  		i := rune(0)
   100  		for _, r := range ns.Digits {
   101  			// Verify that we can do simple math on the UTF-8 byte sequence
   102  			// of zero to get the digit.
   103  			if zero+i != r {
   104  				// Runes not consecutive.
   105  				log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
   106  			}
   107  			i++
   108  		}
   109  		var x [utf8.UTFMax]byte
   110  		utf8.EncodeRune(x[:], zero)
   111  		id := system(len(numSysData))
   112  		systemMap[ns.Id] = id
   113  		numSysData = append(numSysData, systemData{
   114  			id:        id,
   115  			digitSize: byte(sz),
   116  			zero:      x,
   117  		})
   118  	}
   119  	w.WriteVar("numSysData", numSysData)
   120  
   121  	algoID := system(len(numSysData))
   122  	fmt.Fprintln(w, "const (")
   123  	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
   124  		id, ok := systemMap[ns.Id]
   125  		if !ok {
   126  			id = algoID
   127  			systemMap[ns.Id] = id
   128  			algoID++
   129  		}
   130  		fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
   131  	}
   132  	fmt.Fprintln(w, "numNumberSystems")
   133  	fmt.Fprintln(w, ")")
   134  
   135  	fmt.Fprintln(w, "var systemMap = map[string]system{")
   136  	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
   137  		fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
   138  		w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
   139  	}
   140  	fmt.Fprintln(w, "}")
   141  }
   142  
   143  func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
   144  	d, err := cldr.ParseDraft(*draft)
   145  	if err != nil {
   146  		log.Fatalf("invalid draft level: %v", err)
   147  	}
   148  
   149  	nNumberSystems := system(len(systemMap))
   150  
   151  	type symbols [NumSymbolTypes]string
   152  
   153  	type key struct {
   154  		tag    compact.ID
   155  		system system
   156  	}
   157  	symbolMap := map[key]*symbols{}
   158  
   159  	defaults := map[compact.ID]system{}
   160  
   161  	for _, lang := range data.Locales() {
   162  		ldml := data.RawLDML(lang)
   163  		if ldml.Numbers == nil {
   164  			continue
   165  		}
   166  		langIndex, ok := compact.FromTag(language.MustParse(lang))
   167  		if !ok {
   168  			log.Fatalf("No compact index for language %s", lang)
   169  		}
   170  		if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
   171  			defaults[langIndex] = getNumberSystem(d[0].Data())
   172  		}
   173  
   174  		syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
   175  		syms.SelectDraft(d)
   176  
   177  		getFirst := func(name string, x interface{}) string {
   178  			v := reflect.ValueOf(x)
   179  			slice := cldr.MakeSlice(x)
   180  			slice.SelectAnyOf("alt", "", "alt")
   181  			if reflect.Indirect(v).Len() == 0 {
   182  				return ""
   183  			} else if reflect.Indirect(v).Len() > 1 {
   184  				log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
   185  			}
   186  			return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
   187  		}
   188  
   189  		for _, sym := range ldml.Numbers.Symbols {
   190  			if sym.NumberSystem == "" {
   191  				// This is just linking the default of root to "latn".
   192  				continue
   193  			}
   194  			symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
   195  				SymDecimal:                getFirst("decimal", &sym.Decimal),
   196  				SymGroup:                  getFirst("group", &sym.Group),
   197  				SymList:                   getFirst("list", &sym.List),
   198  				SymPercentSign:            getFirst("percentSign", &sym.PercentSign),
   199  				SymPlusSign:               getFirst("plusSign", &sym.PlusSign),
   200  				SymMinusSign:              getFirst("minusSign", &sym.MinusSign),
   201  				SymExponential:            getFirst("exponential", &sym.Exponential),
   202  				SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
   203  				SymPerMille:               getFirst("perMille", &sym.PerMille),
   204  				SymInfinity:               getFirst("infinity", &sym.Infinity),
   205  				SymNan:                    getFirst("nan", &sym.Nan),
   206  				SymTimeSeparator:          getFirst("timeSeparator", &sym.TimeSeparator),
   207  			}
   208  		}
   209  	}
   210  
   211  	// Expand all values.
   212  	for k, syms := range symbolMap {
   213  		for t := SymDecimal; t < NumSymbolTypes; t++ {
   214  			p := k.tag
   215  			for syms[t] == "" {
   216  				p = p.Parent()
   217  				if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
   218  					syms[t] = (*pSyms)[t]
   219  					break
   220  				}
   221  				if p == 0 /* und */ {
   222  					// Default to root, latn.
   223  					syms[t] = (*symbolMap[key{}])[t]
   224  				}
   225  			}
   226  		}
   227  	}
   228  
   229  	// Unique the symbol sets and write the string data.
   230  	m := map[symbols]int{}
   231  	sb := stringset.NewBuilder()
   232  
   233  	symIndex := [][NumSymbolTypes]byte{}
   234  
   235  	for ns := system(0); ns < nNumberSystems; ns++ {
   236  		for _, l := range data.Locales() {
   237  			langIndex, _ := compact.FromTag(language.MustParse(l))
   238  			s := symbolMap[key{langIndex, ns}]
   239  			if s == nil {
   240  				continue
   241  			}
   242  			if _, ok := m[*s]; !ok {
   243  				m[*s] = len(symIndex)
   244  				sb.Add(s[:]...)
   245  				var x [NumSymbolTypes]byte
   246  				for i := SymDecimal; i < NumSymbolTypes; i++ {
   247  					x[i] = byte(sb.Index((*s)[i]))
   248  				}
   249  				symIndex = append(symIndex, x)
   250  			}
   251  		}
   252  	}
   253  	w.WriteVar("symIndex", symIndex)
   254  	w.WriteVar("symData", sb.Set())
   255  
   256  	// resolveSymbolIndex gets the index from the closest matching locale,
   257  	// including the locale itself.
   258  	resolveSymbolIndex := func(langIndex compact.ID, ns system) symOffset {
   259  		for {
   260  			if sym := symbolMap[key{langIndex, ns}]; sym != nil {
   261  				return symOffset(m[*sym])
   262  			}
   263  			if langIndex == 0 {
   264  				return 0 // und, latn
   265  			}
   266  			langIndex = langIndex.Parent()
   267  		}
   268  	}
   269  
   270  	// Create an index with the symbols for each locale for the latn numbering
   271  	// system. If this is not the default, or the only one, for a locale, we
   272  	// will overwrite the value later.
   273  	var langToDefaults [compact.NumCompactTags]symOffset
   274  	for _, l := range data.Locales() {
   275  		langIndex, _ := compact.FromTag(language.MustParse(l))
   276  		langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
   277  	}
   278  
   279  	// Delete redundant entries.
   280  	for _, l := range data.Locales() {
   281  		langIndex, _ := compact.FromTag(language.MustParse(l))
   282  		def := defaults[langIndex]
   283  		syms := symbolMap[key{langIndex, def}]
   284  		if syms == nil {
   285  			continue
   286  		}
   287  		for ns := system(0); ns < nNumberSystems; ns++ {
   288  			if ns == def {
   289  				continue
   290  			}
   291  			if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
   292  				delete(symbolMap, key{langIndex, ns})
   293  			}
   294  		}
   295  	}
   296  
   297  	// Create a sorted list of alternatives per language. This will only need to
   298  	// be referenced if a user specified an alternative numbering system.
   299  	var langToAlt []altSymData
   300  	for _, l := range data.Locales() {
   301  		langIndex, _ := compact.FromTag(language.MustParse(l))
   302  		start := len(langToAlt)
   303  		if start >= hasNonLatnMask {
   304  			log.Fatalf("Number of alternative assignments >= %x", hasNonLatnMask)
   305  		}
   306  		// Create the entry for the default value.
   307  		def := defaults[langIndex]
   308  		langToAlt = append(langToAlt, altSymData{
   309  			compactTag: langIndex,
   310  			system:     def,
   311  			symIndex:   resolveSymbolIndex(langIndex, def),
   312  		})
   313  
   314  		for ns := system(0); ns < nNumberSystems; ns++ {
   315  			if def == ns {
   316  				continue
   317  			}
   318  			if sym := symbolMap[key{langIndex, ns}]; sym != nil {
   319  				langToAlt = append(langToAlt, altSymData{
   320  					compactTag: langIndex,
   321  					system:     ns,
   322  					symIndex:   resolveSymbolIndex(langIndex, ns),
   323  				})
   324  			}
   325  		}
   326  		if def == 0 && len(langToAlt) == start+1 {
   327  			// No additional data: erase the entry.
   328  			langToAlt = langToAlt[:start]
   329  		} else {
   330  			// Overwrite the entry in langToDefaults.
   331  			langToDefaults[langIndex] = hasNonLatnMask | symOffset(start)
   332  		}
   333  	}
   334  	w.WriteComment(`
   335  langToDefaults maps a compact language index to the default numbering system
   336  and default symbol set`)
   337  	w.WriteVar("langToDefaults", langToDefaults)
   338  
   339  	w.WriteComment(`
   340  langToAlt is a list of numbering system and symbol set pairs, sorted and
   341  marked by compact language index.`)
   342  	w.WriteVar("langToAlt", langToAlt)
   343  }
   344  
   345  // genFormats generates the lookup table for decimal, scientific and percent
   346  // patterns.
   347  //
   348  // CLDR allows for patterns to be different per language for different numbering
   349  // systems. In practice the patterns are set to be consistent for a language
   350  // independent of the numbering system. genFormats verifies that no language
   351  // deviates from this.
   352  func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
   353  	d, err := cldr.ParseDraft(*draft)
   354  	if err != nil {
   355  		log.Fatalf("invalid draft level: %v", err)
   356  	}
   357  
   358  	// Fill the first slot with a dummy so we can identify unspecified tags.
   359  	formats := []number.Pattern{{}}
   360  	patterns := map[string]int{}
   361  
   362  	// TODO: It would be possible to eliminate two of these slices by having
   363  	// another indirection and store a reference to the combination of patterns.
   364  	decimal := make([]byte, compact.NumCompactTags)
   365  	scientific := make([]byte, compact.NumCompactTags)
   366  	percent := make([]byte, compact.NumCompactTags)
   367  
   368  	for _, lang := range data.Locales() {
   369  		ldml := data.RawLDML(lang)
   370  		if ldml.Numbers == nil {
   371  			continue
   372  		}
   373  		langIndex, ok := compact.FromTag(language.MustParse(lang))
   374  		if !ok {
   375  			log.Fatalf("No compact index for language %s", lang)
   376  		}
   377  		type patternSlice []*struct {
   378  			cldr.Common
   379  			Numbers string `xml:"numbers,attr"`
   380  			Count   string `xml:"count,attr"`
   381  		}
   382  
   383  		add := func(name string, tags []byte, ps patternSlice) {
   384  			sl := cldr.MakeSlice(&ps)
   385  			sl.SelectDraft(d)
   386  			if len(ps) == 0 {
   387  				return
   388  			}
   389  			if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
   390  				log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
   391  			}
   392  			s := ps[0].Data()
   393  
   394  			index, ok := patterns[s]
   395  			if !ok {
   396  				nf, err := number.ParsePattern(s)
   397  				if err != nil {
   398  					log.Fatal(err)
   399  				}
   400  				index = len(formats)
   401  				patterns[s] = index
   402  				formats = append(formats, *nf)
   403  			}
   404  			tags[langIndex] = byte(index)
   405  		}
   406  
   407  		for _, df := range ldml.Numbers.DecimalFormats {
   408  			for _, l := range df.DecimalFormatLength {
   409  				if l.Type != "" {
   410  					continue
   411  				}
   412  				for _, f := range l.DecimalFormat {
   413  					add("decimal", decimal, f.Pattern)
   414  				}
   415  			}
   416  		}
   417  		for _, df := range ldml.Numbers.ScientificFormats {
   418  			for _, l := range df.ScientificFormatLength {
   419  				if l.Type != "" {
   420  					continue
   421  				}
   422  				for _, f := range l.ScientificFormat {
   423  					add("scientific", scientific, f.Pattern)
   424  				}
   425  			}
   426  		}
   427  		for _, df := range ldml.Numbers.PercentFormats {
   428  			for _, l := range df.PercentFormatLength {
   429  				if l.Type != "" {
   430  					continue
   431  				}
   432  				for _, f := range l.PercentFormat {
   433  					add("percent", percent, f.Pattern)
   434  				}
   435  			}
   436  		}
   437  	}
   438  
   439  	// Complete the parent tag array to reflect inheritance. An index of 0
   440  	// indicates an unspecified value.
   441  	for _, data := range [][]byte{decimal, scientific, percent} {
   442  		for i := range data {
   443  			p := compact.ID(i)
   444  			for ; data[p] == 0; p = p.Parent() {
   445  			}
   446  			data[i] = data[p]
   447  		}
   448  	}
   449  	w.WriteVar("tagToDecimal", decimal)
   450  	w.WriteVar("tagToScientific", scientific)
   451  	w.WriteVar("tagToPercent", percent)
   452  
   453  	value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
   454  	// Break up the lines. This won't give ideal perfect formatting, but it is
   455  	// better than one huge line.
   456  	value = strings.Replace(value, ", ", ",\n", -1)
   457  	fmt.Fprintf(w, "var formats = %s\n", value)
   458  }
   459  

View as plain text