1
2
3
4
5 package language
6
7 import (
8 "strings"
9 "testing"
10
11 "golang.org/x/text/internal/language"
12 )
13
14
15 func (t Tag) equalTags(a Tag) bool {
16 return t.lang() == a.lang() &&
17 t.script() == a.script() &&
18 t.region() == a.region()
19 }
20
21 var errSyntax = language.ErrSyntax
22
23 type parseTest struct {
24 i int
25 in string
26 lang, script, region string
27 variants, ext string
28 extList []string
29 invalid bool
30 rewrite bool
31 changed bool
32 }
33
34 func parseTests() []parseTest {
35 tests := []parseTest{
36 {in: "root", lang: "und"},
37 {in: "und", lang: "und"},
38 {in: "en", lang: "en"},
39
40 {in: "en-US-u-va-posix", lang: "en", region: "US", ext: "u-va-posix"},
41 {in: "ca-ES-valencia", lang: "ca", region: "ES", variants: "valencia"},
42 {in: "en-US-u-rg-gbzzzz", lang: "en", region: "US", ext: "u-rg-gbzzzz"},
43
44 {in: "xy", lang: "und", invalid: true},
45 {in: "en-ZY", lang: "en", invalid: true},
46 {in: "gsw", lang: "gsw"},
47 {in: "sr_Latn", lang: "sr", script: "Latn"},
48 {in: "af-Arab", lang: "af", script: "Arab"},
49 {in: "nl-BE", lang: "nl", region: "BE"},
50 {in: "es-419", lang: "es", region: "419"},
51 {in: "und-001", lang: "und", region: "001"},
52 {in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
53
54 {in: "de-1901", lang: "de", variants: "1901"},
55
56 {in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
57
58 {in: "sl-rozaj", lang: "sl", variants: "rozaj"},
59 {in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
60 {in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
61 {in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
62 {in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
63
64 {in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
65
66
67 {in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
68 {in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
69 {in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
70
71
72 {in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
73
74
75 {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
76 {in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
77 {in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
78 {in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
79
80
81 {in: "de-1902", lang: "de", variants: "", invalid: true},
82
83 {in: "EN_CYRL", lang: "en", script: "Cyrl"},
84
85 {in: "x-a-b-c-d", ext: "x-a-b-c-d"},
86 {in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
87 {in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
88 {in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
89 {in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
90 {in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
91 {in: "en-v-c", lang: "en", ext: "", invalid: true},
92 {in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
93 {in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
94 {in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
95 {in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
96 {in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
97 {in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
98 {in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
99 {in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
100 {in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
101 {in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
102 {in: "en-u-c", lang: "en", ext: "", invalid: true},
103 {in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
104 {in: "en-u-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk", invalid: true},
105 {in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
106 {in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
107 {in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
108 {in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-ca-co-phonebk-nu-arabic", invalid: true, changed: true},
109 {in: "en-u-co-phonebook", lang: "en", ext: "u-co", invalid: true},
110 {in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-co-cu-xau", invalid: true, changed: true},
111 {in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
112 {in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
113 {in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
114 {in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
115 {in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
116 {in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
117 {in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
118 {in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
119
120 {in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk-cu"}, invalid: true, changed: true},
121 {in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-co-cu-xau"}, invalid: true},
122
123
124 {in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau", changed: true},
125 {in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
126 {in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
127 {in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
128
129 {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
130 {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
131 {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
132 {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true},
133 {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false},
134 {in: "fr-est", lang: "et", changed: true},
135 {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true},
136 {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true},
137
138 {in: "", lang: "und", invalid: true},
139 {in: "-", lang: "und", invalid: true},
140 {in: "x", lang: "und", invalid: true},
141 {in: "x-", lang: "und", invalid: true},
142 {in: "x--", lang: "und", invalid: true},
143 {in: "a-a-b-c-d", lang: "und", invalid: true},
144 {in: "en-", lang: "en", invalid: true},
145 {in: "enne-", lang: "und", invalid: true},
146 {in: "en.", lang: "und", invalid: true},
147 {in: "en.-latn", lang: "und", invalid: true},
148 {in: "en.-en", lang: "en", invalid: true},
149 {in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
150 {in: "a-tooManyChars-c-d", lang: "und", invalid: true},
151
152
153 {in: "en-t-abcd", lang: "en", invalid: true},
154 {in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
155
156 {in: "zh-min-nan", lang: "nan"},
157 {in: "zh-yue", lang: "yue"},
158 {in: "zh-xiang", lang: "hsn", rewrite: true},
159 {in: "zh-guoyu", lang: "cmn", rewrite: true},
160 {in: "iw", lang: "iw"},
161 {in: "sgn-BE-FR", lang: "sfb", rewrite: true},
162 {in: "i-klingon", lang: "tlh", rewrite: true},
163 }
164 for i, tt := range tests {
165 tests[i].i = i
166 if tt.extList != nil {
167 tests[i].ext = strings.Join(tt.extList, "-")
168 }
169 if tt.ext != "" && tt.extList == nil {
170 tests[i].extList = []string{tt.ext}
171 }
172 }
173 return tests
174 }
175
176
177 func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
178 for i, tt := range parseTests() {
179 tag, skip := f(&tt)
180 if skip {
181 continue
182 }
183 if l, _ := language.ParseBase(tt.lang); l != tag.lang() {
184 t.Errorf("%d: lang was %q; want %q", i, tag.lang(), l)
185 }
186 if sc, _ := language.ParseScript(tt.script); sc != tag.script() {
187 t.Errorf("%d: script was %q; want %q", i, tag.script(), sc)
188 }
189 if r, _ := language.ParseRegion(tt.region); r != tag.region() {
190 t.Errorf("%d: region was %q; want %q", i, tag.region(), r)
191 }
192 v := tag.tag().Variants()
193 if v != "" {
194 v = v[1:]
195 }
196 if v != tt.variants {
197 t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
198 }
199 if e := strings.Join(tag.tag().Extensions(), "-"); e != tt.ext {
200 t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
201 }
202 }
203 }
204
205 func TestParse(t *testing.T) {
206 partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
207 id, _ = Raw.Parse(tt.in)
208 return id, false
209 })
210 }
211
212 func TestErrors(t *testing.T) {
213 mkInvalid := func(s string) error {
214 return language.NewValueError([]byte(s))
215 }
216 tests := []struct {
217 in string
218 out error
219 }{
220
221 {"ac", mkInvalid("ac")},
222 {"AC", mkInvalid("ac")},
223 {"aa-Uuuu", mkInvalid("Uuuu")},
224 {"aa-AB", mkInvalid("AB")},
225
226 {"ac-u", errSyntax},
227 {"ac-u-ca", mkInvalid("ac")},
228 {"ac-u-ca-co-pinyin", mkInvalid("ac")},
229 {"noob", errSyntax},
230 }
231 for _, tt := range tests {
232 _, err := Parse(tt.in)
233 if err != tt.out {
234 t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
235 }
236 }
237 }
238
239 func TestCompose1(t *testing.T) {
240 partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
241 l, _ := ParseBase(tt.lang)
242 s, _ := ParseScript(tt.script)
243 r, _ := ParseRegion(tt.region)
244 v := []Variant{}
245 for _, x := range strings.Split(tt.variants, "-") {
246 p, _ := ParseVariant(x)
247 v = append(v, p)
248 }
249 e := []Extension{}
250 for _, x := range tt.extList {
251 p, _ := ParseExtension(x)
252 e = append(e, p)
253 }
254 id, _ = Raw.Compose(l, s, r, v, e)
255 return id, false
256 })
257 }
258
259 func TestCompose2(t *testing.T) {
260 partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
261 l, _ := ParseBase(tt.lang)
262 s, _ := ParseScript(tt.script)
263 r, _ := ParseRegion(tt.region)
264 p := []interface{}{l, s, r, s, r, l}
265 for _, x := range strings.Split(tt.variants, "-") {
266 if x != "" {
267 v, _ := ParseVariant(x)
268 p = append(p, v)
269 }
270 }
271 for _, x := range tt.extList {
272 e, _ := ParseExtension(x)
273 p = append(p, e)
274 }
275 id, _ = Raw.Compose(p...)
276 return id, false
277 })
278 }
279
280 func TestCompose3(t *testing.T) {
281 partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
282 id, _ = Raw.Parse(tt.in)
283 id, _ = Raw.Compose(id)
284 return id, false
285 })
286 }
287
288 func mk(s string) Tag {
289 return Raw.Make(s)
290 }
291
292 func TestParseAcceptLanguage(t *testing.T) {
293 type res struct {
294 t Tag
295 q float32
296 }
297 en := []res{{mk("en"), 1.0}}
298 tests := []struct {
299 out []res
300 in string
301 ok bool
302 }{
303 {en, "en", true},
304 {en, " en", true},
305 {en, "en ", true},
306 {en, " en ", true},
307 {en, "en,", true},
308 {en, ",en", true},
309 {en, ",,,en,,,", true},
310 {en, ",en;q=1", true},
311
312
313 {nil, "", true},
314 {[]res{{mk("aa"), 1}}, "aa;", true},
315
316
317 {nil, ";", false},
318 {nil, "$", false},
319 {nil, "e;", false},
320 {nil, "x;", false},
321 {nil, "x", false},
322 {nil, "ac", false},
323 {nil, "aa;q", false},
324 {nil, "aa;q=", false},
325 {nil, "aa;q=.", false},
326 {nil, "00-t-0o", false},
327
328
329 {
330 []res{{mk("en"), 0.1}},
331 " english ;q=.1",
332 true,
333 },
334 {
335 []res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
336 " italian, deutsch, french",
337 true,
338 },
339
340
341 {
342 []res{{mk("en"), 0.1}},
343 "en;q=.1",
344 true,
345 },
346 {
347 []res{{mk("mul"), 1.0}},
348 "*",
349 true,
350 },
351 {
352 []res{{mk("en"), 1.0}, {mk("de"), 1.0}},
353 "en,de",
354 true,
355 },
356 {
357 []res{{mk("en"), 1.0}, {mk("de"), .5}},
358 "en,de;q=0.5",
359 true,
360 },
361 {
362 []res{{mk("de"), 0.8}, {mk("en"), 0.5}},
363 " en ; q = 0.5 , , de;q=0.8",
364 true,
365 },
366 {
367 []res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
368 "en,de,fr,i-klingon",
369 true,
370 },
371
372 {
373 []res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
374 "en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
375 true,
376 },
377
378 {
379 []res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
380 "en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
381 true,
382 },
383 }
384 for i, tt := range tests {
385 tags, qs, e := ParseAcceptLanguage(tt.in)
386 if e == nil != tt.ok {
387 t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
388 }
389 for j, tag := range tags {
390 if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
391 t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
392 break
393 }
394 }
395 }
396 }
397
398 func TestParseAcceptLanguageTooBig(t *testing.T) {
399 s := strings.Repeat("en-x-a-", 333)
400 _, _, err := ParseAcceptLanguage(s)
401 if err != language.ErrSyntax {
402 t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, language.ErrSyntax)
403 }
404 s += "en-x-a"
405 _, _, err = ParseAcceptLanguage(s)
406 if err != errTagListTooLarge {
407 t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, errTagListTooLarge)
408 }
409 }
410
View as plain text