...

Source file src/golang.org/x/text/internal/language/language_test.go

Documentation: golang.org/x/text/internal/language

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package language
     6  
     7  import (
     8  	"reflect"
     9  	"testing"
    10  
    11  	"golang.org/x/text/internal/testtext"
    12  )
    13  
    14  func TestTagSize(t *testing.T) {
    15  	id := Tag{}
    16  	typ := reflect.TypeOf(id)
    17  	if typ.Size() > 32 {
    18  		t.Errorf("size of Tag was %d; want <= 32", typ.Size())
    19  	}
    20  }
    21  
    22  func TestIsRoot(t *testing.T) {
    23  	loc := Tag{}
    24  	if !loc.IsRoot() {
    25  		t.Errorf("unspecified should be root.")
    26  	}
    27  	for i, tt := range parseTests() {
    28  		loc, _ := Parse(tt.in)
    29  		undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
    30  		if loc.IsRoot() != undef {
    31  			t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
    32  		}
    33  	}
    34  }
    35  
    36  func TestEquality(t *testing.T) {
    37  	for i, tt := range parseTests() {
    38  		s := tt.in
    39  		tag := Make(s)
    40  		t1 := Make(tag.String())
    41  		if tag != t1 {
    42  			t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
    43  		}
    44  	}
    45  }
    46  
    47  func TestMakeString(t *testing.T) {
    48  	tests := []struct{ in, out string }{
    49  		{"und", "und"},
    50  		{"und", "und-CW"},
    51  		{"nl", "nl-NL"},
    52  		{"de-1901", "nl-1901"},
    53  		{"de-1901", "de-Arab-1901"},
    54  		{"x-a-b", "de-Arab-x-a-b"},
    55  		{"x-a-b", "x-a-b"},
    56  	}
    57  	for i, tt := range tests {
    58  		id, _ := Parse(tt.in)
    59  		mod, _ := Parse(tt.out)
    60  		id.setTagsFrom(mod)
    61  		for j := 0; j < 2; j++ {
    62  			id.RemakeString()
    63  			if str := id.String(); str != tt.out {
    64  				t.Errorf("%d:%d: found %s; want %s", i, j, id.String(), tt.out)
    65  			}
    66  		}
    67  		// The bytes to string conversion as used in remakeString
    68  		// occasionally measures as more than one alloc, breaking this test.
    69  		// To alleviate this we set the number of runs to more than 1.
    70  		if n := testtext.AllocsPerRun(8, id.RemakeString); n > 1 {
    71  			t.Errorf("%d: # allocs got %.1f; want <= 1", i, n)
    72  		}
    73  	}
    74  }
    75  
    76  func TestMarshal(t *testing.T) {
    77  	testCases := []string{
    78  		// TODO: these values will change with each CLDR update. This issue
    79  		// will be solved if we decide to fix the indexes.
    80  		"und",
    81  		"ca-ES-valencia",
    82  		"ca-ES-valencia-u-va-posix",
    83  		"ca-ES-valencia-u-co-phonebk",
    84  		"ca-ES-valencia-u-co-phonebk-va-posix",
    85  		"x-klingon",
    86  		"en-US",
    87  		"en-US-u-va-posix",
    88  		"en",
    89  		"en-u-co-phonebk",
    90  		"en-001",
    91  		"sh",
    92  	}
    93  	for _, tc := range testCases {
    94  		var tag Tag
    95  		err := tag.UnmarshalText([]byte(tc))
    96  		if err != nil {
    97  			t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
    98  		}
    99  		b, err := tag.MarshalText()
   100  		if err != nil {
   101  			t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
   102  		}
   103  		if got := string(b); got != tc {
   104  			t.Errorf("%s: got %q; want %q", tc, got, tc)
   105  		}
   106  	}
   107  }
   108  
   109  func TestParseBase(t *testing.T) {
   110  	tests := []struct {
   111  		in  string
   112  		out string
   113  		ok  bool
   114  	}{
   115  		{"en", "en", true},
   116  		{"EN", "en", true},
   117  		{"nld", "nl", true},
   118  		{"dut", "dut", true},  // bibliographic
   119  		{"aaj", "und", false}, // unknown
   120  		{"qaa", "qaa", true},
   121  		{"a", "und", false},
   122  		{"", "und", false},
   123  		{"aaaa", "und", false},
   124  	}
   125  	for i, tt := range tests {
   126  		x, err := ParseBase(tt.in)
   127  		if x.String() != tt.out || err == nil != tt.ok {
   128  			t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
   129  		}
   130  		if y, _, _ := Make(tt.out).Raw(); x != y {
   131  			t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
   132  		}
   133  	}
   134  }
   135  
   136  func TestParseScript(t *testing.T) {
   137  	tests := []struct {
   138  		in  string
   139  		out string
   140  		ok  bool
   141  	}{
   142  		{"Latn", "Latn", true},
   143  		{"zzzz", "Zzzz", true},
   144  		{"zyyy", "Zyyy", true},
   145  		{"Latm", "Zzzz", false},
   146  		{"Zzz", "Zzzz", false},
   147  		{"", "Zzzz", false},
   148  		{"Zzzxx", "Zzzz", false},
   149  	}
   150  	for i, tt := range tests {
   151  		x, err := ParseScript(tt.in)
   152  		if x.String() != tt.out || err == nil != tt.ok {
   153  			t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
   154  		}
   155  		if err == nil {
   156  			if _, y, _ := Make("und-" + tt.out).Raw(); x != y {
   157  				t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
   158  			}
   159  		}
   160  	}
   161  }
   162  
   163  func TestEncodeM49(t *testing.T) {
   164  	tests := []struct {
   165  		m49  int
   166  		code string
   167  		ok   bool
   168  	}{
   169  		{1, "001", true},
   170  		{840, "US", true},
   171  		{899, "ZZ", false},
   172  	}
   173  	for i, tt := range tests {
   174  		if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
   175  			t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
   176  		}
   177  	}
   178  	for i := 1; i <= 1000; i++ {
   179  		if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
   180  			t.Errorf("%d has no error, but maps to undefined region", i)
   181  		}
   182  	}
   183  }
   184  
   185  func TestParseRegion(t *testing.T) {
   186  	tests := []struct {
   187  		in  string
   188  		out string
   189  		ok  bool
   190  	}{
   191  		{"001", "001", true},
   192  		{"840", "US", true},
   193  		{"899", "ZZ", false},
   194  		{"USA", "US", true},
   195  		{"US", "US", true},
   196  		{"BC", "ZZ", false},
   197  		{"C", "ZZ", false},
   198  		{"CCCC", "ZZ", false},
   199  		{"01", "ZZ", false},
   200  	}
   201  	for i, tt := range tests {
   202  		r, err := ParseRegion(tt.in)
   203  		if r.String() != tt.out || err == nil != tt.ok {
   204  			t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
   205  		}
   206  		if err == nil {
   207  			if _, _, y := Make("und-" + tt.out).Raw(); r != y {
   208  				t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
   209  			}
   210  		}
   211  	}
   212  }
   213  
   214  func TestIsCountry(t *testing.T) {
   215  	tests := []struct {
   216  		reg     string
   217  		country bool
   218  	}{
   219  		{"US", true},
   220  		{"001", false},
   221  		{"958", false},
   222  		{"419", false},
   223  		{"203", true},
   224  		{"020", true},
   225  		{"900", false},
   226  		{"999", false},
   227  		{"QO", false},
   228  		{"EU", false},
   229  		{"AA", false},
   230  		{"XK", true},
   231  	}
   232  	for i, tt := range tests {
   233  		r, _ := getRegionID([]byte(tt.reg))
   234  		if r.IsCountry() != tt.country {
   235  			t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
   236  		}
   237  	}
   238  }
   239  
   240  func TestIsGroup(t *testing.T) {
   241  	tests := []struct {
   242  		reg   string
   243  		group bool
   244  	}{
   245  		{"US", false},
   246  		{"001", true},
   247  		{"958", false},
   248  		{"419", true},
   249  		{"203", false},
   250  		{"020", false},
   251  		{"900", false},
   252  		{"999", false},
   253  		{"QO", true},
   254  		{"EU", true},
   255  		{"AA", false},
   256  		{"XK", false},
   257  	}
   258  	for i, tt := range tests {
   259  		r, _ := getRegionID([]byte(tt.reg))
   260  		if r.IsGroup() != tt.group {
   261  			t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
   262  		}
   263  	}
   264  }
   265  
   266  func TestContains(t *testing.T) {
   267  	tests := []struct {
   268  		enclosing, contained string
   269  		contains             bool
   270  	}{
   271  		// A region contains itself.
   272  		{"US", "US", true},
   273  		{"001", "001", true},
   274  
   275  		// Direct containment.
   276  		{"001", "002", true},
   277  		{"039", "XK", true},
   278  		{"150", "XK", true},
   279  		{"EU", "AT", true},
   280  		{"QO", "AQ", true},
   281  
   282  		// Indirect containemnt.
   283  		{"001", "US", true},
   284  		{"001", "419", true},
   285  		{"001", "013", true},
   286  
   287  		// No containment.
   288  		{"US", "001", false},
   289  		{"155", "EU", false},
   290  	}
   291  	for i, tt := range tests {
   292  		enc, _ := getRegionID([]byte(tt.enclosing))
   293  		con, _ := getRegionID([]byte(tt.contained))
   294  		r := enc
   295  		if got := r.Contains(con); got != tt.contains {
   296  			t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
   297  		}
   298  	}
   299  }
   300  
   301  func TestRegionCanonicalize(t *testing.T) {
   302  	for i, tt := range []struct{ in, out string }{
   303  		{"UK", "GB"},
   304  		{"TP", "TL"},
   305  		{"QU", "EU"},
   306  		{"SU", "SU"},
   307  		{"VD", "VN"},
   308  		{"DD", "DE"},
   309  	} {
   310  		r := MustParseRegion(tt.in)
   311  		want := MustParseRegion(tt.out)
   312  		if got := r.Canonicalize(); got != want {
   313  			t.Errorf("%d: got %v; want %v", i, got, want)
   314  		}
   315  	}
   316  }
   317  
   318  func TestRegionTLD(t *testing.T) {
   319  	for _, tt := range []struct {
   320  		in, out string
   321  		ok      bool
   322  	}{
   323  		{"EH", "EH", true},
   324  		{"FR", "FR", true},
   325  		{"TL", "TL", true},
   326  
   327  		// In ccTLD before in ISO.
   328  		{"GG", "GG", true},
   329  
   330  		// Non-standard assignment of ccTLD to ISO code.
   331  		{"GB", "UK", true},
   332  
   333  		// Exceptionally reserved in ISO and valid ccTLD.
   334  		{"UK", "UK", true},
   335  		{"AC", "AC", true},
   336  		{"EU", "EU", true},
   337  		{"SU", "SU", true},
   338  
   339  		// Exceptionally reserved in ISO and invalid ccTLD.
   340  		{"CP", "ZZ", false},
   341  		{"DG", "ZZ", false},
   342  		{"EA", "ZZ", false},
   343  		{"FX", "ZZ", false},
   344  		{"IC", "ZZ", false},
   345  		{"TA", "ZZ", false},
   346  
   347  		// Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
   348  		// it is still being phased out.
   349  		{"AN", "AN", true},
   350  		{"TP", "TP", true},
   351  
   352  		// Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
   353  		// Defined in package language as it has a mapping in CLDR.
   354  		{"BU", "ZZ", false},
   355  		{"CS", "ZZ", false},
   356  		{"NT", "ZZ", false},
   357  		{"YU", "ZZ", false},
   358  		{"ZR", "ZZ", false},
   359  		// Not defined in package: SF.
   360  
   361  		// Indeterminately reserved in ISO.
   362  		// Defined in package language as it has a legacy mapping in CLDR.
   363  		{"DY", "ZZ", false},
   364  		{"RH", "ZZ", false},
   365  		{"VD", "ZZ", false},
   366  		// Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
   367  		// RN, RP, WG, WL, WV, and YV.
   368  
   369  		// Not assigned in ISO, but legacy definitions in CLDR.
   370  		{"DD", "ZZ", false},
   371  		{"YD", "ZZ", false},
   372  
   373  		// Normal mappings but somewhat special status in ccTLD.
   374  		{"BL", "BL", true},
   375  		{"MF", "MF", true},
   376  		{"BV", "BV", true},
   377  		{"SJ", "SJ", true},
   378  
   379  		// Have values when normalized, but not as is.
   380  		{"QU", "ZZ", false},
   381  
   382  		// ISO Private Use.
   383  		{"AA", "ZZ", false},
   384  		{"QM", "ZZ", false},
   385  		{"QO", "ZZ", false},
   386  		{"XA", "ZZ", false},
   387  		{"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
   388  	} {
   389  		if tt.in == "" {
   390  			continue
   391  		}
   392  
   393  		r := MustParseRegion(tt.in)
   394  		var want Region
   395  		if tt.out != "ZZ" {
   396  			want = MustParseRegion(tt.out)
   397  		}
   398  		tld, err := r.TLD()
   399  		if got := err == nil; got != tt.ok {
   400  			t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
   401  		}
   402  		if tld != want {
   403  			t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
   404  		}
   405  	}
   406  }
   407  
   408  func TestTypeForKey(t *testing.T) {
   409  	tests := []struct{ key, in, out string }{
   410  		{"co", "en", ""},
   411  		{"co", "en-u-abc", ""},
   412  		{"co", "en-u-co-phonebk", "phonebk"},
   413  		{"co", "en-u-co-phonebk-cu-aud", "phonebk"},
   414  		{"co", "x-foo-u-co-phonebk", ""},
   415  		{"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
   416  		{"kc", "cmn-u-co-stroke", ""},
   417  	}
   418  	for _, tt := range tests {
   419  		if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
   420  			t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
   421  		}
   422  	}
   423  }
   424  
   425  func TestSetTypeForKey(t *testing.T) {
   426  	tests := []struct {
   427  		key, value, in, out string
   428  		err                 bool
   429  	}{
   430  		// replace existing value
   431  		{"co", "pinyin", "en-u-co-phonebk", "en-u-co-pinyin", false},
   432  		{"co", "pinyin", "en-u-co-phonebk-cu-xau", "en-u-co-pinyin-cu-xau", false},
   433  		{"co", "pinyin", "en-u-co-phonebk-v-xx", "en-u-co-pinyin-v-xx", false},
   434  		{"co", "pinyin", "en-u-co-phonebk-x-x", "en-u-co-pinyin-x-x", false},
   435  		{"co", "pinyin", "en-u-co-x-x", "en-u-co-pinyin-x-x", false},
   436  		{"nu", "arabic", "en-u-co-phonebk-nu-vaai", "en-u-co-phonebk-nu-arabic", false},
   437  		{"nu", "arabic", "en-u-co-phonebk-nu", "en-u-co-phonebk-nu-arabic", false},
   438  		// add to existing -u extension
   439  		{"co", "pinyin", "en-u-ca-gregory", "en-u-ca-gregory-co-pinyin", false},
   440  		{"co", "pinyin", "en-u-ca-gregory-nu-vaai", "en-u-ca-gregory-co-pinyin-nu-vaai", false},
   441  		{"co", "pinyin", "en-u-ca-gregory-v-va", "en-u-ca-gregory-co-pinyin-v-va", false},
   442  		{"co", "pinyin", "en-u-ca-gregory-x-a", "en-u-ca-gregory-co-pinyin-x-a", false},
   443  		{"ca", "gregory", "en-u-co-pinyin", "en-u-ca-gregory-co-pinyin", false},
   444  		// remove pair
   445  		{"co", "", "en-u-co-phonebk", "en", false},
   446  		{"co", "", "en-u-co", "en", false},
   447  		{"co", "", "en-u-co-v", "en", false},
   448  		{"co", "", "en-u-co-v-", "en", false},
   449  		{"co", "", "en-u-ca-gregory-co-phonebk", "en-u-ca-gregory", false},
   450  		{"co", "", "en-u-co-phonebk-nu-arabic", "en-u-nu-arabic", false},
   451  		{"co", "", "en-u-co-nu-arabic", "en-u-nu-arabic", false},
   452  		{"co", "", "en", "en", false},
   453  		// add -u extension
   454  		{"co", "pinyin", "en", "en-u-co-pinyin", false},
   455  		{"co", "pinyin", "und", "und-u-co-pinyin", false},
   456  		{"co", "pinyin", "en-a-aaa", "en-a-aaa-u-co-pinyin", false},
   457  		{"co", "pinyin", "en-x-aaa", "en-u-co-pinyin-x-aaa", false},
   458  		{"co", "pinyin", "en-v-aa", "en-u-co-pinyin-v-aa", false},
   459  		{"co", "pinyin", "en-a-aaa-x-x", "en-a-aaa-u-co-pinyin-x-x", false},
   460  		{"co", "pinyin", "en-a-aaa-v-va", "en-a-aaa-u-co-pinyin-v-va", false},
   461  		// error on invalid values
   462  		{"co", "pinyinxxx", "en", "en", true},
   463  		{"co", "piny.n", "en", "en", true},
   464  		{"co", "pinyinxxx", "en-a-aaa", "en-a-aaa", true},
   465  		{"co", "pinyinxxx", "en-u-aaa", "en-u-aaa", true},
   466  		{"co", "pinyinxxx", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
   467  		{"co", "pinyi.", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
   468  		{"col", "pinyin", "en", "en", true},
   469  		{"co", "cu", "en", "en", true},
   470  		// error when setting on a private use tag
   471  		{"co", "phonebook", "x-foo", "x-foo", true},
   472  	}
   473  	for i, tt := range tests {
   474  		tag := Make(tt.in)
   475  		if v, err := tag.SetTypeForKey(tt.key, tt.value); v.String() != tt.out {
   476  			t.Errorf("%d:%q[%q]=%q: was %q; want %q", i, tt.in, tt.key, tt.value, v, tt.out)
   477  		} else if (err != nil) != tt.err {
   478  			t.Errorf("%d:%q[%q]=%q: error was %v; want %v", i, tt.in, tt.key, tt.value, err != nil, tt.err)
   479  		} else if val := v.TypeForKey(tt.key); err == nil && val != tt.value {
   480  			t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
   481  		}
   482  		if len(tag.String()) <= 3 {
   483  			// Simulate a tag for which the string has not been set.
   484  			tag.str, tag.pExt, tag.pVariant = "", 0, 0
   485  			if tag, err := tag.SetTypeForKey(tt.key, tt.value); err == nil {
   486  				if val := tag.TypeForKey(tt.key); err == nil && val != tt.value {
   487  					t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
   488  				}
   489  			}
   490  		}
   491  	}
   492  }
   493  
   494  func TestFindKeyAndType(t *testing.T) {
   495  	// out is either the matched type in case of a match or the original
   496  	// string up till the insertion point.
   497  	tests := []struct {
   498  		key     string
   499  		hasExt  bool
   500  		in, out string
   501  	}{
   502  		// Don't search past a private use extension.
   503  		{"co", false, "en-x-foo-u-co-pinyin", "en"},
   504  		{"co", false, "x-foo-u-co-pinyin", ""},
   505  		{"co", false, "en-s-fff-x-foo", "en-s-fff"},
   506  		// Insertion points in absence of -u extension.
   507  		{"cu", false, "en", ""}, // t.str is ""
   508  		{"cu", false, "en-v-va", "en"},
   509  		{"cu", false, "en-a-va", "en-a-va"},
   510  		{"cu", false, "en-a-va-v-va", "en-a-va"},
   511  		{"cu", false, "en-x-a", "en"},
   512  		// Tags with the -u extension.
   513  		{"nu", true, "en-u-cu-nu", "en-u-cu"},
   514  		{"cu", true, "en-u-cu-nu", "en-u"},
   515  		{"co", true, "en-u-co-standard", "standard"},
   516  		{"co", true, "yue-u-co-pinyin", "pinyin"},
   517  		{"co", true, "en-u-co-abc", "abc"},
   518  		{"co", true, "en-u-co-abc-def", "abc-def"},
   519  		{"co", true, "en-u-co-abc-def-x-foo", "abc-def"},
   520  		{"co", true, "en-u-co-standard-nu-arab", "standard"},
   521  		{"co", true, "yue-u-co-pinyin-nu-arab", "pinyin"},
   522  		// Insertion points.
   523  		{"cu", true, "en-u-co-standard", "en-u-co-standard"},
   524  		{"cu", true, "yue-u-co-pinyin-x-foo", "yue-u-co-pinyin"},
   525  		{"cu", true, "en-u-co-abc", "en-u-co-abc"},
   526  		{"cu", true, "en-u-nu-arabic", "en-u"},
   527  		{"cu", true, "en-u-co-abc-def-nu-arabic", "en-u-co-abc-def"},
   528  	}
   529  	for i, tt := range tests {
   530  		start, sep, end, hasExt := Make(tt.in).findTypeForKey(tt.key)
   531  		if sep != end {
   532  			res := tt.in[sep:end]
   533  			if res != tt.out {
   534  				t.Errorf("%d:%s: was %q; want %q", i, tt.in, res, tt.out)
   535  			}
   536  		} else {
   537  			if hasExt != tt.hasExt {
   538  				t.Errorf("%d:%s: hasExt was %v; want %v", i, tt.in, hasExt, tt.hasExt)
   539  				continue
   540  			}
   541  			if tt.in[:start] != tt.out {
   542  				t.Errorf("%d:%s: insertion point was %q; want %q", i, tt.in, tt.in[:start], tt.out)
   543  			}
   544  		}
   545  	}
   546  }
   547  
   548  func TestParent(t *testing.T) {
   549  	tests := []struct{ in, out string }{
   550  		// Strip variants and extensions first
   551  		{"de-u-co-phonebk", "de"},
   552  		{"de-1994", "de"},
   553  		{"de-Latn-1994", "de"}, // remove superfluous script.
   554  
   555  		// Ensure the canonical Tag for an entry is in the chain for base-script
   556  		// pairs.
   557  		{"zh-Hans", "zh"},
   558  
   559  		// Skip the script if it is the maximized version. CLDR files for the
   560  		// skipped tag are always empty.
   561  		{"zh-Hans-TW", "zh"},
   562  		{"zh-Hans-CN", "zh"},
   563  
   564  		// Insert the script if the maximized script is not the same as the
   565  		// maximized script of the base language.
   566  		{"zh-TW", "zh-Hant"},
   567  		{"zh-HK", "zh-Hant"},
   568  		{"zh-Hant-TW", "zh-Hant"},
   569  		{"zh-Hant-HK", "zh-Hant"},
   570  
   571  		// Non-default script skips to und.
   572  		// CLDR
   573  		{"az-Cyrl", "und"},
   574  		{"bs-Cyrl", "und"},
   575  		{"en-Dsrt", "und"},
   576  		{"ha-Arab", "und"},
   577  		{"mn-Mong", "und"},
   578  		{"pa-Arab", "und"},
   579  		{"shi-Latn", "und"},
   580  		{"sr-Latn", "und"},
   581  		{"uz-Arab", "und"},
   582  		{"uz-Cyrl", "und"},
   583  		{"vai-Latn", "und"},
   584  		{"zh-Hant", "und"},
   585  		// extra
   586  		{"nl-Cyrl", "und"},
   587  
   588  		// World english inherits from en-001.
   589  		{"en-150", "en-001"},
   590  		{"en-AU", "en-001"},
   591  		{"en-BE", "en-001"},
   592  		{"en-GG", "en-001"},
   593  		{"en-GI", "en-001"},
   594  		{"en-HK", "en-001"},
   595  		{"en-IE", "en-001"},
   596  		{"en-IM", "en-001"},
   597  		{"en-IN", "en-001"},
   598  		{"en-JE", "en-001"},
   599  		{"en-MT", "en-001"},
   600  		{"en-NZ", "en-001"},
   601  		{"en-PK", "en-001"},
   602  		{"en-SG", "en-001"},
   603  
   604  		// Spanish in Latin-American countries have es-419 as parent.
   605  		{"es-AR", "es-419"},
   606  		{"es-BO", "es-419"},
   607  		{"es-CL", "es-419"},
   608  		{"es-CO", "es-419"},
   609  		{"es-CR", "es-419"},
   610  		{"es-CU", "es-419"},
   611  		{"es-DO", "es-419"},
   612  		{"es-EC", "es-419"},
   613  		{"es-GT", "es-419"},
   614  		{"es-HN", "es-419"},
   615  		{"es-MX", "es-419"},
   616  		{"es-NI", "es-419"},
   617  		{"es-PA", "es-419"},
   618  		{"es-PE", "es-419"},
   619  		{"es-PR", "es-419"},
   620  		{"es-PY", "es-419"},
   621  		{"es-SV", "es-419"},
   622  		{"es-US", "es-419"},
   623  		{"es-UY", "es-419"},
   624  		{"es-VE", "es-419"},
   625  		// exceptions (according to CLDR)
   626  		{"es-CW", "es"},
   627  
   628  		// Inherit from pt-PT, instead of pt for these countries.
   629  		{"pt-AO", "pt-PT"},
   630  		{"pt-CV", "pt-PT"},
   631  		{"pt-GW", "pt-PT"},
   632  		{"pt-MO", "pt-PT"},
   633  		{"pt-MZ", "pt-PT"},
   634  		{"pt-ST", "pt-PT"},
   635  		{"pt-TL", "pt-PT"},
   636  	}
   637  	for _, tt := range tests {
   638  		tag := MustParse(tt.in)
   639  		if p := MustParse(tt.out); p != tag.Parent() {
   640  			t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
   641  		}
   642  	}
   643  }
   644  
   645  var (
   646  	// Tags without error that don't need to be changed.
   647  	benchBasic = []string{
   648  		"en",
   649  		"en-Latn",
   650  		"en-GB",
   651  		"za",
   652  		"zh-Hant",
   653  		"zh",
   654  		"zh-HK",
   655  		"ar-MK",
   656  		"en-CA",
   657  		"fr-CA",
   658  		"fr-CH",
   659  		"fr",
   660  		"lv",
   661  		"he-IT",
   662  		"tlh",
   663  		"ja",
   664  		"ja-Jpan",
   665  		"ja-Jpan-JP",
   666  		"de-1996",
   667  		"de-CH",
   668  		"sr",
   669  		"sr-Latn",
   670  	}
   671  	// Tags with extensions, not changes required.
   672  	benchExt = []string{
   673  		"x-a-b-c-d",
   674  		"x-aa-bbbb-cccccccc-d",
   675  		"en-x_cc-b-bbb-a-aaa",
   676  		"en-c_cc-b-bbb-a-aaa-x-x",
   677  		"en-u-co-phonebk",
   678  		"en-Cyrl-u-co-phonebk",
   679  		"en-US-u-co-phonebk-cu-xau",
   680  		"en-nedix-u-co-phonebk",
   681  		"en-t-t0-abcd",
   682  		"en-t-nl-latn",
   683  		"en-t-t0-abcd-x-a",
   684  		"en_t_pt_MLt",
   685  		"en-t-fr-est",
   686  	}
   687  	// Change, but not memory allocation required.
   688  	benchSimpleChange = []string{
   689  		"EN",
   690  		"i-klingon",
   691  		"en-latn",
   692  		"zh-cmn-Hans-CN",
   693  		"iw-NL",
   694  	}
   695  	// Change and memory allocation required.
   696  	benchChangeAlloc = []string{
   697  		"en-c_cc-b-bbb-a-aaa",
   698  		"en-u-cu-xua-co-phonebk",
   699  		"en-u-cu-xua-co-phonebk-a-cd",
   700  		"en-u-def-abc-cu-xua-co-phonebk",
   701  		"en-t-en-Cyrl-NL-1994",
   702  		"en-t-en-Cyrl-NL-1994-t0-abc-def",
   703  	}
   704  	// Tags that result in errors.
   705  	benchErr = []string{
   706  		// IllFormed
   707  		"x_A.-B-C_D",
   708  		"en-u-cu-co-phonebk",
   709  		"en-u-cu-xau-co",
   710  		"en-t-nl-abcd",
   711  		// Invalid
   712  		"xx",
   713  		"nl-Uuuu",
   714  		"nl-QB",
   715  	}
   716  	benchChange = append(benchSimpleChange, benchChangeAlloc...)
   717  	benchAll    = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
   718  )
   719  
   720  func doParse(b *testing.B, tag []string) {
   721  	for i := 0; i < b.N; i++ {
   722  		// Use the modulo instead of looping over all tags so that we get a somewhat
   723  		// meaningful ns/op.
   724  		Parse(tag[i%len(tag)])
   725  	}
   726  }
   727  
   728  func BenchmarkParse(b *testing.B) {
   729  	doParse(b, benchAll)
   730  }
   731  
   732  func BenchmarkParseBasic(b *testing.B) {
   733  	doParse(b, benchBasic)
   734  }
   735  
   736  func BenchmarkParseError(b *testing.B) {
   737  	doParse(b, benchErr)
   738  }
   739  
   740  func BenchmarkParseSimpleChange(b *testing.B) {
   741  	doParse(b, benchSimpleChange)
   742  }
   743  
   744  func BenchmarkParseChangeAlloc(b *testing.B) {
   745  	doParse(b, benchChangeAlloc)
   746  }
   747  

View as plain text