...

Source file src/golang.org/x/text/internal/language/compact/parse_test.go

Documentation: golang.org/x/text/internal/language/compact

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package compact
     6  
     7  import (
     8  	"strings"
     9  	"testing"
    10  
    11  	"golang.org/x/text/internal/language"
    12  )
    13  
    14  var errSyntax = language.ErrSyntax
    15  
    16  type parseTest struct {
    17  	i                    int // the index of this test
    18  	in                   string
    19  	lang, script, region string
    20  	variants, ext        string
    21  	extList              []string // only used when more than one extension is present
    22  	invalid              bool
    23  	rewrite              bool // special rewrite not handled by parseTag
    24  	changed              bool // string needed to be reformatted
    25  }
    26  
    27  func parseTests() []parseTest {
    28  	tests := []parseTest{
    29  		{in: "root", lang: "und"},
    30  		{in: "und", lang: "und"},
    31  		{in: "en", lang: "en"},
    32  
    33  		{in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"},
    34  		{in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"},
    35  		{in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"},
    36  
    37  		{in: "xy", lang: "und", invalid: true},
    38  		{in: "en-ZY", lang: "en", invalid: true},
    39  		{in: "gsw", lang: "gsw"},
    40  		{in: "sr_Latn", lang: "sr", script: "Latn"},
    41  		{in: "af-Arab", lang: "af", script: "Arab"},
    42  		{in: "nl-BE", lang: "nl", region: "BE"},
    43  		{in: "es-419", lang: "es", region: "419"},
    44  		{in: "und-001", lang: "und", region: "001"},
    45  		{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
    46  		// Variants
    47  		{in: "de-1901", lang: "de", variants: "1901"},
    48  		// Accept with unsuppressed script.
    49  		{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
    50  		// Specialized.
    51  		{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
    52  		{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
    53  		{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
    54  		{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
    55  		{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
    56  		// Maximum number of variants while adhering to prefix rules.
    57  		{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
    58  
    59  		// Sorting.
    60  		{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
    61  		{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
    62  		{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
    63  
    64  		// Duplicates variants are removed, but not an error.
    65  		{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
    66  
    67  		// Variants that do not have correct prefixes. We still accept these.
    68  		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
    69  		{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
    70  		{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
    71  		{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
    72  
    73  		// Invalid variant.
    74  		{in: "de-1902", lang: "de", variants: "", invalid: true},
    75  
    76  		{in: "EN_CYRL", lang: "en", script: "Cyrl"},
    77  		// private use and extensions
    78  		{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
    79  		{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
    80  		{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
    81  		{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
    82  		{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
    83  		{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
    84  		{in: "en-v-c", lang: "en", ext: "", invalid: true},
    85  		{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
    86  		{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
    87  		{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
    88  		{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
    89  		{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
    90  		{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
    91  		{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
    92  		{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
    93  		{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
    94  		{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
    95  		{in: "en-u-c", lang: "en", ext: "", invalid: true},
    96  		{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
    97  		{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true},
    98  		{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
    99  		{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
   100  		{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
   101  		{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
   102  		{in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true},
   103  		{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true},
   104  		{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
   105  		{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
   106  		{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
   107  		{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
   108  		{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
   109  		{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
   110  		{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
   111  		{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
   112  		// Invalid "u" extension. Drop invalid parts.
   113  		{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true},
   114  		{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true},
   115  		// We allow duplicate keys as the LDML spec does not explicitly prohibit it.
   116  		// TODO: Consider eliminating duplicates and returning an error.
   117  		{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
   118  		{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
   119  		{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
   120  		{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
   121  		// Not necessary to have changed here.
   122  		{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
   123  		{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
   124  		{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
   125  		{in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
   126  		{in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
   127  		{in: "fr-est", lang: "et", changed: true},
   128  		{in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
   129  		{in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true},
   130  		// invalid
   131  		{in: "", lang: "und", invalid: true},
   132  		{in: "-", lang: "und", invalid: true},
   133  		{in: "x", lang: "und", invalid: true},
   134  		{in: "x-", lang: "und", invalid: true},
   135  		{in: "x--", lang: "und", invalid: true},
   136  		{in: "a-a-b-c-d", lang: "und", invalid: true},
   137  		{in: "en-", lang: "en", invalid: true},
   138  		{in: "enne-", lang: "und", invalid: true},
   139  		{in: "en.", lang: "und", invalid: true},
   140  		{in: "en.-latn", lang: "und", invalid: true},
   141  		{in: "en.-en", lang: "en", invalid: true},
   142  		{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
   143  		{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
   144  		// TODO: check key-value validity
   145  		// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
   146  		{in: "en-t-abcd", lang: "en", invalid: true},
   147  		{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
   148  		// rewrites (more tests in TestGrandfathered)
   149  		{in: "zh-min-nan", lang: "nan"},
   150  		{in: "zh-yue", lang: "yue"},
   151  		{in: "zh-xiang", lang: "hsn", rewrite: true},
   152  		{in: "zh-guoyu", lang: "cmn", rewrite: true},
   153  		{in: "iw", lang: "iw"},
   154  		{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
   155  		{in: "i-klingon", lang: "tlh", rewrite: true},
   156  	}
   157  	for i, tt := range tests {
   158  		tests[i].i = i
   159  		if tt.extList != nil {
   160  			tests[i].ext = strings.Join(tt.extList, "-")
   161  		}
   162  		if tt.ext != "" && tt.extList == nil {
   163  			tests[i].extList = []string{tt.ext}
   164  		}
   165  	}
   166  	return tests
   167  }
   168  
   169  // partChecks runs checks for each part by calling the function returned by f.
   170  func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
   171  	for i, tt := range parseTests() {
   172  		tag, skip := f(&tt)
   173  		if skip {
   174  			continue
   175  		}
   176  		if l, _ := language.ParseBase(tt.lang); l != tag.Tag().LangID {
   177  			t.Errorf("%d: lang was %q; want %q", i, tag.Tag().LangID, l)
   178  		}
   179  		if sc, _ := language.ParseScript(tt.script); sc != tag.Tag().ScriptID {
   180  			t.Errorf("%d: script was %q; want %q", i, tag.Tag().ScriptID, sc)
   181  		}
   182  		if r, _ := language.ParseRegion(tt.region); r != tag.Tag().RegionID {
   183  			t.Errorf("%d: region was %q; want %q", i, tag.Tag().RegionID, r)
   184  		}
   185  		v := tag.Tag().Variants()
   186  		if v != "" {
   187  			v = v[1:]
   188  		}
   189  		if v != tt.variants {
   190  			t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
   191  		}
   192  		if e := strings.Join(tag.Tag().Extensions(), "-"); e != tt.ext {
   193  			t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
   194  		}
   195  	}
   196  }
   197  
   198  func mk(s string) Tag {
   199  	tag, _ := language.Parse(s)
   200  	return Make(tag)
   201  }
   202  

View as plain text