...

Source file src/golang.org/x/text/language/parse_test.go

Documentation: golang.org/x/text/language

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package language
     6  
     7  import (
     8  	"strings"
     9  	"testing"
    10  
    11  	"golang.org/x/text/internal/language"
    12  )
    13  
    14  // equalTags compares language, script and region subtags only.
    15  func (t Tag) equalTags(a Tag) bool {
    16  	return t.lang() == a.lang() &&
    17  		t.script() == a.script() &&
    18  		t.region() == a.region()
    19  }
    20  
    21  var errSyntax = language.ErrSyntax
    22  
    23  type parseTest struct {
    24  	i                    int // the index of this test
    25  	in                   string
    26  	lang, script, region string
    27  	variants, ext        string
    28  	extList              []string // only used when more than one extension is present
    29  	invalid              bool
    30  	rewrite              bool // special rewrite not handled by parseTag
    31  	changed              bool // string needed to be reformatted
    32  }
    33  
    34  func parseTests() []parseTest {
    35  	tests := []parseTest{
    36  		{in: "root", lang: "und"},
    37  		{in: "und", lang: "und"},
    38  		{in: "en", lang: "en"},
    39  
    40  		{in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"},
    41  		{in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"},
    42  		{in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"},
    43  
    44  		{in: "xy", lang: "und", invalid: true},
    45  		{in: "en-ZY", lang: "en", invalid: true},
    46  		{in: "gsw", lang: "gsw"},
    47  		{in: "sr_Latn", lang: "sr", script: "Latn"},
    48  		{in: "af-Arab", lang: "af", script: "Arab"},
    49  		{in: "nl-BE", lang: "nl", region: "BE"},
    50  		{in: "es-419", lang: "es", region: "419"},
    51  		{in: "und-001", lang: "und", region: "001"},
    52  		{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
    53  		// Variants
    54  		{in: "de-1901", lang: "de", variants: "1901"},
    55  		// Accept with unsuppressed script.
    56  		{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
    57  		// Specialized.
    58  		{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
    59  		{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
    60  		{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
    61  		{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
    62  		{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
    63  		// Maximum number of variants while adhering to prefix rules.
    64  		{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
    65  
    66  		// Sorting.
    67  		{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
    68  		{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
    69  		{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
    70  
    71  		// Duplicates variants are removed, but not an error.
    72  		{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
    73  
    74  		// Variants that do not have correct prefixes. We still accept these.
    75  		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
    76  		{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
    77  		{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
    78  		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
    79  
    80  		// Invalid variant.
    81  		{in: "de-1902", lang: "de", variants: "", invalid: true},
    82  
    83  		{in: "EN_CYRL", lang: "en", script: "Cyrl"},
    84  		// private use and extensions
    85  		{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
    86  		{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
    87  		{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
    88  		{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
    89  		{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
    90  		{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
    91  		{in: "en-v-c", lang: "en", ext: "", invalid: true},
    92  		{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
    93  		{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
    94  		{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
    95  		{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
    96  		{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
    97  		{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
    98  		{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
    99  		{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
   100  		{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
   101  		{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
   102  		{in: "en-u-c", lang: "en", ext: "", invalid: true},
   103  		{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
   104  		{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", invalid: true},
   105  		{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
   106  		{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
   107  		{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
   108  		{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
   109  		{in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true},
   110  		{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true},
   111  		{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
   112  		{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
   113  		{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
   114  		{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
   115  		{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
   116  		{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
   117  		{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
   118  		{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
   119  		// Invalid "u" extension. Drop invalid parts.
   120  		{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, invalid: true, changed: true},
   121  		{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, invalid: true},
   122  		// We allow duplicate keys as the LDML spec does not explicitly prohibit it.
   123  		// TODO: Consider eliminating duplicates and returning an error.
   124  		{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
   125  		{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
   126  		{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
   127  		{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
   128  		// Not necessary to have changed here.
   129  		{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
   130  		{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
   131  		{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
   132  		{in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
   133  		{in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
   134  		{in: "fr-est", lang: "et", changed: true},
   135  		{in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
   136  		{in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true},
   137  		// invalid
   138  		{in: "", lang: "und", invalid: true},
   139  		{in: "-", lang: "und", invalid: true},
   140  		{in: "x", lang: "und", invalid: true},
   141  		{in: "x-", lang: "und", invalid: true},
   142  		{in: "x--", lang: "und", invalid: true},
   143  		{in: "a-a-b-c-d", lang: "und", invalid: true},
   144  		{in: "en-", lang: "en", invalid: true},
   145  		{in: "enne-", lang: "und", invalid: true},
   146  		{in: "en.", lang: "und", invalid: true},
   147  		{in: "en.-latn", lang: "und", invalid: true},
   148  		{in: "en.-en", lang: "en", invalid: true},
   149  		{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
   150  		{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
   151  		// TODO: check key-value validity
   152  		// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
   153  		{in: "en-t-abcd", lang: "en", invalid: true},
   154  		{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
   155  		// rewrites (more tests in TestGrandfathered)
   156  		{in: "zh-min-nan", lang: "nan"},
   157  		{in: "zh-yue", lang: "yue"},
   158  		{in: "zh-xiang", lang: "hsn", rewrite: true},
   159  		{in: "zh-guoyu", lang: "cmn", rewrite: true},
   160  		{in: "iw", lang: "iw"},
   161  		{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
   162  		{in: "i-klingon", lang: "tlh", rewrite: true},
   163  	}
   164  	for i, tt := range tests {
   165  		tests[i].i = i
   166  		if tt.extList != nil {
   167  			tests[i].ext = strings.Join(tt.extList, "-")
   168  		}
   169  		if tt.ext != "" && tt.extList == nil {
   170  			tests[i].extList = []string{tt.ext}
   171  		}
   172  	}
   173  	return tests
   174  }
   175  
   176  // partChecks runs checks for each part by calling the function returned by f.
   177  func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
   178  	for i, tt := range parseTests() {
   179  		tag, skip := f(&tt)
   180  		if skip {
   181  			continue
   182  		}
   183  		if l, _ := language.ParseBase(tt.lang); l != tag.lang() {
   184  			t.Errorf("%d: lang was %q; want %q", i, tag.lang(), l)
   185  		}
   186  		if sc, _ := language.ParseScript(tt.script); sc != tag.script() {
   187  			t.Errorf("%d: script was %q; want %q", i, tag.script(), sc)
   188  		}
   189  		if r, _ := language.ParseRegion(tt.region); r != tag.region() {
   190  			t.Errorf("%d: region was %q; want %q", i, tag.region(), r)
   191  		}
   192  		v := tag.tag().Variants()
   193  		if v != "" {
   194  			v = v[1:]
   195  		}
   196  		if v != tt.variants {
   197  			t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
   198  		}
   199  		if e := strings.Join(tag.tag().Extensions(), "-"); e != tt.ext {
   200  			t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
   201  		}
   202  	}
   203  }
   204  
   205  func TestParse(t *testing.T) {
   206  	partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
   207  		id, _ = Raw.Parse(tt.in)
   208  		return id, false
   209  	})
   210  }
   211  
   212  func TestErrors(t *testing.T) {
   213  	mkInvalid := func(s string) error {
   214  		return language.NewValueError([]byte(s))
   215  	}
   216  	tests := []struct {
   217  		in  string
   218  		out error
   219  	}{
   220  		// invalid subtags.
   221  		{"ac", mkInvalid("ac")},
   222  		{"AC", mkInvalid("ac")},
   223  		{"aa-Uuuu", mkInvalid("Uuuu")},
   224  		{"aa-AB", mkInvalid("AB")},
   225  		// ill-formed wins over invalid.
   226  		{"ac-u", errSyntax},
   227  		{"ac-u-ca", mkInvalid("ac")},
   228  		{"ac-u-ca-co-pinyin", mkInvalid("ac")},
   229  		{"noob", errSyntax},
   230  	}
   231  	for _, tt := range tests {
   232  		_, err := Parse(tt.in)
   233  		if err != tt.out {
   234  			t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
   235  		}
   236  	}
   237  }
   238  
   239  func TestCompose1(t *testing.T) {
   240  	partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
   241  		l, _ := ParseBase(tt.lang)
   242  		s, _ := ParseScript(tt.script)
   243  		r, _ := ParseRegion(tt.region)
   244  		v := []Variant{}
   245  		for _, x := range strings.Split(tt.variants, "-") {
   246  			p, _ := ParseVariant(x)
   247  			v = append(v, p)
   248  		}
   249  		e := []Extension{}
   250  		for _, x := range tt.extList {
   251  			p, _ := ParseExtension(x)
   252  			e = append(e, p)
   253  		}
   254  		id, _ = Raw.Compose(l, s, r, v, e)
   255  		return id, false
   256  	})
   257  }
   258  
   259  func TestCompose2(t *testing.T) {
   260  	partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
   261  		l, _ := ParseBase(tt.lang)
   262  		s, _ := ParseScript(tt.script)
   263  		r, _ := ParseRegion(tt.region)
   264  		p := []interface{}{l, s, r, s, r, l}
   265  		for _, x := range strings.Split(tt.variants, "-") {
   266  			if x != "" {
   267  				v, _ := ParseVariant(x)
   268  				p = append(p, v)
   269  			}
   270  		}
   271  		for _, x := range tt.extList {
   272  			e, _ := ParseExtension(x)
   273  			p = append(p, e)
   274  		}
   275  		id, _ = Raw.Compose(p...)
   276  		return id, false
   277  	})
   278  }
   279  
   280  func TestCompose3(t *testing.T) {
   281  	partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
   282  		id, _ = Raw.Parse(tt.in)
   283  		id, _ = Raw.Compose(id)
   284  		return id, false
   285  	})
   286  }
   287  
   288  func mk(s string) Tag {
   289  	return Raw.Make(s)
   290  }
   291  
   292  func TestParseAcceptLanguage(t *testing.T) {
   293  	type res struct {
   294  		t Tag
   295  		q float32
   296  	}
   297  	en := []res{{mk("en"), 1.0}}
   298  	tests := []struct {
   299  		out []res
   300  		in  string
   301  		ok  bool
   302  	}{
   303  		{en, "en", true},
   304  		{en, "   en", true},
   305  		{en, "en   ", true},
   306  		{en, "  en  ", true},
   307  		{en, "en,", true},
   308  		{en, ",en", true},
   309  		{en, ",,,en,,,", true},
   310  		{en, ",en;q=1", true},
   311  
   312  		// We allow an empty input, contrary to spec.
   313  		{nil, "", true},
   314  		{[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight
   315  
   316  		// errors
   317  		{nil, ";", false},
   318  		{nil, "$", false},
   319  		{nil, "e;", false},
   320  		{nil, "x;", false},
   321  		{nil, "x", false},
   322  		{nil, "ac", false}, // non-existing language
   323  		{nil, "aa;q", false},
   324  		{nil, "aa;q=", false},
   325  		{nil, "aa;q=.", false},
   326  		{nil, "00-t-0o", false},
   327  
   328  		// odd fallbacks
   329  		{
   330  			[]res{{mk("en"), 0.1}},
   331  			" english ;q=.1",
   332  			true,
   333  		},
   334  		{
   335  			[]res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
   336  			" italian, deutsch, french",
   337  			true,
   338  		},
   339  
   340  		// lists
   341  		{
   342  			[]res{{mk("en"), 0.1}},
   343  			"en;q=.1",
   344  			true,
   345  		},
   346  		{
   347  			[]res{{mk("mul"), 1.0}},
   348  			"*",
   349  			true,
   350  		},
   351  		{
   352  			[]res{{mk("en"), 1.0}, {mk("de"), 1.0}},
   353  			"en,de",
   354  			true,
   355  		},
   356  		{
   357  			[]res{{mk("en"), 1.0}, {mk("de"), .5}},
   358  			"en,de;q=0.5",
   359  			true,
   360  		},
   361  		{
   362  			[]res{{mk("de"), 0.8}, {mk("en"), 0.5}},
   363  			"  en ;   q    =   0.5    ,  , de;q=0.8",
   364  			true,
   365  		},
   366  		{
   367  			[]res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
   368  			"en,de,fr,i-klingon",
   369  			true,
   370  		},
   371  		// sorting
   372  		{
   373  			[]res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
   374  			"en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
   375  			true,
   376  		},
   377  		// dropping
   378  		{
   379  			[]res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
   380  			"en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
   381  			true,
   382  		},
   383  	}
   384  	for i, tt := range tests {
   385  		tags, qs, e := ParseAcceptLanguage(tt.in)
   386  		if e == nil != tt.ok {
   387  			t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
   388  		}
   389  		for j, tag := range tags {
   390  			if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
   391  				t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
   392  				break
   393  			}
   394  		}
   395  	}
   396  }
   397  
   398  func TestParseAcceptLanguageTooBig(t *testing.T) {
   399  	s := strings.Repeat("en-x-a-", 333)
   400  	_, _, err := ParseAcceptLanguage(s)
   401  	if err != language.ErrSyntax {
   402  		t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, language.ErrSyntax)
   403  	}
   404  	s += "en-x-a"
   405  	_, _, err = ParseAcceptLanguage(s)
   406  	if err != errTagListTooLarge {
   407  		t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, errTagListTooLarge)
   408  	}
   409  }
   410  

View as plain text