1
2
3
4
5 package language
6
7 import (
8 "bytes"
9 "strings"
10 "testing"
11
12 "golang.org/x/text/internal/tag"
13 )
14
15 type scanTest struct {
16 ok bool
17 in string
18 tok []string
19 }
20
21 var tests = []scanTest{
22 {true, "", []string{}},
23 {true, "1", []string{"1"}},
24 {true, "en", []string{"en"}},
25 {true, "root", []string{"root"}},
26 {true, "maxchars", []string{"maxchars"}},
27 {false, "bad/", []string{}},
28 {false, "morethan8", []string{}},
29 {false, "-", []string{}},
30 {false, "----", []string{}},
31 {false, "_", []string{}},
32 {true, "en-US", []string{"en", "US"}},
33 {true, "en_US", []string{"en", "US"}},
34 {false, "en-US-", []string{"en", "US"}},
35 {false, "en-US--", []string{"en", "US"}},
36 {false, "en-US---", []string{"en", "US"}},
37 {false, "en--US", []string{"en", "US"}},
38 {false, "-en-US", []string{"en", "US"}},
39 {false, "-en--US-", []string{"en", "US"}},
40 {false, "-en--US-", []string{"en", "US"}},
41 {false, "en-.-US", []string{"en", "US"}},
42 {false, ".-en--US-.", []string{"en", "US"}},
43 {false, "en-u.-US", []string{"en", "US"}},
44 {true, "en-u1-US", []string{"en", "u1", "US"}},
45 {true, "maxchar1_maxchar2-maxchar3", []string{"maxchar1", "maxchar2", "maxchar3"}},
46 {false, "moreThan8-moreThan8-e", []string{"e"}},
47 }
48
49 func TestScan(t *testing.T) {
50 for i, tt := range tests {
51 scan := makeScannerString(tt.in)
52 for j := 0; !scan.done; j++ {
53 if j >= len(tt.tok) {
54 t.Errorf("%d: extra token %q", i, scan.token)
55 } else if tag.Compare(tt.tok[j], scan.token) != 0 {
56 t.Errorf("%d: token %d: found %q; want %q", i, j, scan.token, tt.tok[j])
57 break
58 }
59 scan.scan()
60 }
61 if s := strings.Join(tt.tok, "-"); tag.Compare(s, bytes.Replace(scan.b, b("_"), b("-"), -1)) != 0 {
62 t.Errorf("%d: input: found %q; want %q", i, scan.b, s)
63 }
64 if (scan.err == nil) != tt.ok {
65 t.Errorf("%d: ok: found %v; want %v", i, scan.err == nil, tt.ok)
66 }
67 }
68 }
69
70 func TestAcceptMinSize(t *testing.T) {
71 for i, tt := range tests {
72
73 for sz := 1; sz <= 8; sz++ {
74 scan := makeScannerString(tt.in)
75 scan.end, scan.next = 0, 0
76 end := scan.acceptMinSize(sz)
77 n := 0
78 for i := 0; i < len(tt.tok) && len(tt.tok[i]) >= sz; i++ {
79 n += len(tt.tok[i])
80 if i > 0 {
81 n++
82 }
83 }
84 if end != n {
85 t.Errorf("%d:%d: found len %d; want %d", i, sz, end, n)
86 }
87 }
88 }
89 }
90
91 type parseTest struct {
92 i int
93 in string
94 lang, script, region string
95 variants, ext string
96 extList []string
97 invalid bool
98 rewrite bool
99 changed bool
100 }
101
102 func parseTests() []parseTest {
103 tests := []parseTest{
104 {in: "root", lang: "und"},
105 {in: "und", lang: "und"},
106 {in: "en", lang: "en"},
107 {in: "xy", lang: "und", invalid: true},
108 {in: "en-ZY", lang: "en", invalid: true},
109 {in: "gsw", lang: "gsw"},
110 {in: "sr_Latn", lang: "sr", script: "Latn"},
111 {in: "af-Arab", lang: "af", script: "Arab"},
112 {in: "nl-BE", lang: "nl", region: "BE"},
113 {in: "es-419", lang: "es", region: "419"},
114 {in: "und-001", lang: "und", region: "001"},
115 {in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
116
117 {in: "de-1901", lang: "de", variants: "1901"},
118
119 {in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
120
121 {in: "sl-rozaj", lang: "sl", variants: "rozaj"},
122 {in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
123 {in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
124 {in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
125 {in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
126
127 {in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
128
129
130 {in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
131 {in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
132 {in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
133
134
135 {in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
136
137
138 {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
139 {in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
140 {in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
141 {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
142
143
144 {in: "de-1902", lang: "de", variants: "", invalid: true},
145
146 {in: "EN_CYRL", lang: "en", script: "Cyrl"},
147
148 {in: "x-a-b-c-d", ext: "x-a-b-c-d"},
149 {in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
150 {in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
151 {in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
152 {in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
153 {in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
154 {in: "en-v-c", lang: "en", ext: "", invalid: true},
155 {in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
156 {in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
157 {in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
158 {in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
159 {in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
160 {in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
161 {in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
162 {in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
163 {in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
164 {in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
165 {in: "en-u-c", lang: "en", ext: "", invalid: true},
166 {in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
167 {in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", changed: true},
168 {in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", changed: true},
169 {in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
170 {in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
171 {in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
172 {in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true},
173 {in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true},
174 {in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
175 {in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
176 {in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
177 {in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
178 {in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
179 {in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
180 {in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
181 {in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
182 {in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, changed: true},
183 {in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, changed: true},
184
185 {in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
186
187 {in: "en-US-u-cu-xau-cu-eur", lang: "en", region: "US", ext: "u-cu-xau", invalid: true, changed: false},
188 {in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
189 {in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
190 {in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
191
192 {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
193 {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
194 {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
195 {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
196 {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
197 {in: "fr-est", lang: "et", changed: false},
198 {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: false},
199
200
201
202
203
204 {in: "", lang: "und", invalid: true},
205 {in: "-", lang: "und", invalid: true},
206 {in: "x", lang: "und", invalid: true},
207 {in: "x-", lang: "und", invalid: true},
208 {in: "x--", lang: "und", invalid: true},
209 {in: "a-a-b-c-d", lang: "und", invalid: true},
210 {in: "en-", lang: "en", invalid: true},
211 {in: "enne-", lang: "und", invalid: true},
212 {in: "en.", lang: "und", invalid: true},
213 {in: "en.-latn", lang: "und", invalid: true},
214 {in: "en.-en", lang: "en", invalid: true},
215 {in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
216 {in: "a-tooManyChars-c-d", lang: "und", invalid: true},
217
218
219 {in: "en-t-abcd", lang: "en", invalid: true},
220 {in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
221
222 {in: "zh-min-nan", lang: "nan"},
223 {in: "zh-yue", lang: "yue"},
224 {in: "zh-xiang", lang: "hsn", rewrite: true},
225 {in: "zh-guoyu", lang: "cmn", rewrite: true},
226 {in: "iw", lang: "iw"},
227 {in: "sgn-BE-FR", lang: "sfb", rewrite: true},
228 {in: "i-klingon", lang: "tlh", rewrite: true},
229 }
230 for i, tt := range tests {
231 tests[i].i = i
232 if tt.extList != nil {
233 tests[i].ext = strings.Join(tt.extList, "-")
234 }
235 if tt.ext != "" && tt.extList == nil {
236 tests[i].extList = []string{tt.ext}
237 }
238 }
239 return tests
240 }
241
242 func TestParseExtensions(t *testing.T) {
243 for i, tt := range parseTests() {
244 if tt.ext == "" || tt.rewrite {
245 continue
246 }
247 scan := makeScannerString(tt.in)
248 if len(scan.b) > 1 && scan.b[1] != '-' {
249 scan.end = nextExtension(string(scan.b), 0)
250 scan.next = scan.end + 1
251 scan.scan()
252 }
253 start := scan.start
254 scan.toLower(start, len(scan.b))
255 parseExtensions(&scan)
256 ext := string(scan.b[start:])
257 if ext != tt.ext {
258 t.Errorf("%d(%s): ext was %v; want %v", i, tt.in, ext, tt.ext)
259 }
260 if changed := !strings.HasPrefix(tt.in[start:], ext); changed != tt.changed {
261 t.Errorf("%d(%s): changed was %v; want %v", i, tt.in, changed, tt.changed)
262 }
263 }
264 }
265
266
267 func partChecks(t *testing.T, f func(*testing.T, *parseTest) (Tag, bool)) {
268 for i, tt := range parseTests() {
269 t.Run(tt.in, func(t *testing.T) {
270 tag, skip := f(t, &tt)
271 if skip {
272 return
273 }
274 if l, _ := getLangID(b(tt.lang)); l != tag.LangID {
275 t.Errorf("%d: lang was %q; want %q", i, tag.LangID, l)
276 }
277 if sc, _ := getScriptID(script, b(tt.script)); sc != tag.ScriptID {
278 t.Errorf("%d: script was %q; want %q", i, tag.ScriptID, sc)
279 }
280 if r, _ := getRegionID(b(tt.region)); r != tag.RegionID {
281 t.Errorf("%d: region was %q; want %q", i, tag.RegionID, r)
282 }
283 if tag.str == "" {
284 return
285 }
286 p := int(tag.pVariant)
287 if p < int(tag.pExt) {
288 p++
289 }
290 if s, g := tag.str[p:tag.pExt], tt.variants; s != g {
291 t.Errorf("%d: variants was %q; want %q", i, s, g)
292 }
293 p = int(tag.pExt)
294 if p > 0 && p < len(tag.str) {
295 p++
296 }
297 if s, g := (tag.str)[p:], tt.ext; s != g {
298 t.Errorf("%d: extensions were %q; want %q", i, s, g)
299 }
300 })
301 }
302 }
303
304 func TestParseTag(t *testing.T) {
305 partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
306 if strings.HasPrefix(tt.in, "x-") || tt.rewrite {
307 return Tag{}, true
308 }
309 scan := makeScannerString(tt.in)
310 id, end := parseTag(&scan, true)
311 id.str = string(scan.b[:end])
312 tt.ext = ""
313 tt.extList = []string{}
314 return id, false
315 })
316 }
317
318 func TestParse(t *testing.T) {
319 partChecks(t, func(t *testing.T, tt *parseTest) (id Tag, skip bool) {
320 id, err := Parse(tt.in)
321 ext := ""
322 if id.str != "" {
323 if strings.HasPrefix(id.str, "x-") {
324 ext = id.str
325 } else if int(id.pExt) < len(id.str) && id.pExt > 0 {
326 ext = id.str[id.pExt+1:]
327 }
328 }
329 if tag, _ := Parse(id.String()); tag.String() != id.String() {
330 t.Errorf("%d:%s: reparse was %q; want %q", tt.i, tt.in, id.String(), tag.String())
331 }
332 if ext != tt.ext {
333 t.Errorf("%d:%s: ext was %q; want %q", tt.i, tt.in, ext, tt.ext)
334 }
335 changed := id.str != "" && !strings.HasPrefix(tt.in, id.str)
336 if changed != tt.changed {
337 t.Errorf("%d:%s: changed was %v; want %v", tt.i, tt.in, changed, tt.changed)
338 }
339 if (err != nil) != tt.invalid {
340 t.Errorf("%d:%s: invalid was %v; want %v. Error: %v", tt.i, tt.in, err != nil, tt.invalid, err)
341 }
342 return id, false
343 })
344 }
345
346 func TestErrors(t *testing.T) {
347 mkInvalid := func(s string) error {
348 return NewValueError([]byte(s))
349 }
350 tests := []struct {
351 in string
352 out error
353 }{
354
355 {"ac", mkInvalid("ac")},
356 {"AC", mkInvalid("ac")},
357 {"aa-Uuuu", mkInvalid("Uuuu")},
358 {"aa-AB", mkInvalid("AB")},
359
360 {"ac-u", ErrSyntax},
361 {"ac-u-ca", mkInvalid("ac")},
362 {"ac-u-ca-co-pinyin", mkInvalid("ac")},
363 {"noob", ErrSyntax},
364 }
365 for _, tt := range tests {
366 _, err := Parse(tt.in)
367 if err != tt.out {
368 t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
369 }
370 }
371 }
372
View as plain text