1
2
3
4
5 package cases
6
7 import (
8 "bytes"
9 "fmt"
10 "path"
11 "strings"
12 "testing"
13 "unicode/utf8"
14
15 "golang.org/x/text/internal/testtext"
16 "golang.org/x/text/language"
17 "golang.org/x/text/transform"
18 "golang.org/x/text/unicode/norm"
19 )
20
21 type testCase struct {
22 lang string
23 src interface{}
24 title interface{}
25 lower interface{}
26 upper interface{}
27 opts options
28 }
29
30 var testCases = []testCase{
31 0: {
32 lang: "und",
33 src: "abc aBc ABC abC İsıI ΕΣΆΣ",
34 title: "Abc Abc Abc Abc İsıi Εσάσ",
35 lower: "abc abc abc abc i\u0307sıi εσάσ",
36 upper: "ABC ABC ABC ABC İSII ΕΣΆΣ",
37 opts: getOpts(HandleFinalSigma(false)),
38 },
39
40 1: {
41 lang: "und",
42 src: "abc aBc ABC abC İsıI ΕΣΆΣ Σ _Σ -Σ",
43 title: "Abc Abc Abc Abc İsıi Εσάς Σ _Σ -Σ",
44 lower: "abc abc abc abc i\u0307sıi εσάς σ _σ -σ",
45 upper: "ABC ABC ABC ABC İSII ΕΣΆΣ Σ _Σ -Σ",
46 opts: getOpts(HandleFinalSigma(true)),
47 },
48
49 2: {
50 lang: supported,
51 src: "DžA",
52 title: "Dža",
53 lower: "dža",
54 upper: "DŽA",
55 },
56
57 3: {
58
59 lang: supported,
60 src: []string{
61 "FOO CASE TEST",
62 "DON'T DO THiS",
63 "χωΡΊΣ χωΡΊΣ^a χωΡΊΣ:a χωΡΊΣ:^a χωΡΊΣ^ όμΩΣ Σ",
64 "with-hyphens",
65 "49ers 49ers",
66 `"capitalize a^a -hyphen 0X _u a_u:a`,
67 "MidNumLet a.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg",
68 "MidNum a,b;c\u037ed\u0589e\u060cf\u2044g\ufe50h",
69 "\u0345 x\u3031x x\u05d0x \u05d0x a'.a a.a a4,a",
70 },
71 title: []string{
72 "Foo Case Test",
73 "Don't Do This",
74 "Χωρίς Χωρίσ^A Χωρίσ:a Χωρίσ:^A Χωρίς^ Όμως Σ",
75 "With-Hyphens",
76
77
78
79 "49Ers 49Ers",
80 `"Capitalize A^A -Hyphen 0X _U A_u:a`,
81 "Midnumlet A.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg",
82 "Midnum A,B;C\u037eD\u0589E\u060cF\u2044G\ufe50H",
83 "\u0399 X\u3031X X\u05d0x \u05d0X A'.A A.a A4,A",
84 },
85 },
86
87
88
89
90
91
92
93
94
95
96
97
98 4: {
99
100 lang: "und",
101 src: "abc aBc ABC abC İsıI o'Brien",
102 title: "Abc ABc ABC AbC İsıI O'Brien",
103 opts: getOpts(NoLower),
104 },
105
106 5: {
107 lang: "el",
108 src: "aBc ΟΔΌΣ Οδός Σο ΣΟ Σ oΣ ΟΣ σ ἕξ \u03ac",
109 title: "Abc Οδός Οδός Σο Σο Σ Oς Ος Σ Ἕξ \u0386",
110 lower: "abc οδός οδός σο σο σ oς ος σ ἕξ \u03ac",
111 upper: "ABC ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ Σ OΣ ΟΣ Σ ΕΞ \u0391",
112 },
113
114 6: {
115 lang: "tr az",
116 src: "Isiİ İsıI I\u0307sIiİ İsıI\u0307 I\u0300\u0307",
117 title: "Isii İsıı I\u0307sıii İsıi I\u0300\u0307",
118 lower: "ısii isıı isıii isıi \u0131\u0300\u0307",
119 upper: "ISİİ İSII I\u0307SIİİ İSII\u0307 I\u0300\u0307",
120 },
121
122 7: {
123 lang: "lt",
124 src: "I Ï J J̈ Į Į̈ Ì Í Ĩ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤",
125 title: "I Ï J J̈ Į Į̈ Ì Í Ĩ Xi̇̈ Xj̇̈ Xį̇̈ Xi̇̀ Xi̇́ Xi̇̃ Xi Xi̇̈ Xj Xj̇̈ Xį Xį̇̈ Xi̟̤",
126 lower: "i i̇̈ j j̇̈ į į̇̈ i̇̀ i̇́ i̇̃ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ xi xi̇̈ xj xj̇̈ xį xį̇̈ xi̟̤",
127 upper: "I Ï J J̈ Į Į̈ Ì Í Ĩ XÏ XJ̈ XĮ̈ XÌ XÍ XĨ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤",
128 },
129
130 8: {
131 lang: "lt",
132 src: "\u012e\u0300 \u00cc i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307",
133 title: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307",
134 lower: "\u012f\u0307\u0300 i\u0307\u0300 i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307",
135 upper: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307",
136 },
137
138 9: {
139 lang: "nl",
140 src: "ijs IJs Ij Ijs İJ İJs aa aA 'ns 'S",
141 title: "IJs IJs IJ IJs İj İjs Aa Aa 'ns 's",
142 },
143
144
145
146
147 10: {
148 lang: "af",
149 src: "wag 'n bietjie",
150 title: "Wag 'n Bietjie",
151 lower: "wag 'n bietjie",
152 upper: "WAG 'N BIETJIE",
153 },
154 }
155
156 func TestCaseMappings(t *testing.T) {
157 for i, tt := range testCases {
158 src, ok := tt.src.([]string)
159 if !ok {
160 src = strings.Split(tt.src.(string), " ")
161 }
162
163 for _, lang := range strings.Split(tt.lang, " ") {
164 tag := language.MustParse(lang)
165 testEntry := func(name string, mk func(language.Tag, options) transform.SpanningTransformer, gold interface{}) {
166 c := Caser{mk(tag, tt.opts)}
167 if gold != nil {
168 wants, ok := gold.([]string)
169 if !ok {
170 wants = strings.Split(gold.(string), " ")
171 }
172 for j, want := range wants {
173 if got := c.String(src[j]); got != want {
174 t.Errorf("%d:%s:\n%s.String(%+q):\ngot %+q;\nwant %+q", i, lang, name, src[j], got, want)
175 }
176 }
177 }
178 dst := make([]byte, 256)
179 src := []byte(strings.Join(src, " "))
180 v := testtext.AllocsPerRun(20, func() {
181 c.Transform(dst, src, true)
182 })
183 if v > 1.1 {
184 t.Errorf("%d:%s:\n%s: number of allocs was %f; want 0", i, lang, name, v)
185 }
186 }
187 testEntry("Upper", makeUpper, tt.upper)
188 testEntry("Lower", makeLower, tt.lower)
189 testEntry("Title", makeTitle, tt.title)
190 }
191 }
192 }
193
194
195 func TestAlloc(t *testing.T) {
196 dst := make([]byte, 256)
197 src := []byte(txtNonASCII)
198
199 for i, f := range []func() Caser{
200 func() Caser { return Upper(language.Und) },
201 func() Caser { return Lower(language.Und) },
202 func() Caser { return Lower(language.Und, HandleFinalSigma(false)) },
203
204
205
206
207 } {
208 testtext.Run(t, "", func(t *testing.T) {
209 var c Caser
210 v := testtext.AllocsPerRun(10, func() {
211 c = f()
212 })
213 if v > 0 {
214
215
216 t.Errorf("%d:init: number of allocs was %f; want 0", i, v)
217 }
218 v = testtext.AllocsPerRun(2, func() {
219 c.Transform(dst, src, true)
220 })
221 if v > 0 {
222 t.Errorf("%d:transform: number of allocs was %f; want 0", i, v)
223 }
224 })
225 }
226 }
227
228 func testHandover(t *testing.T, c Caser, src string) {
229 want := c.String(src)
230
231 pSrc := 0
232 for ; pSrc < len(src) && pSrc < len(want) && want[pSrc] == src[pSrc]; pSrc++ {
233 }
234
235
236 for i := 0; i < pSrc; i++ {
237 testtext.Run(t, fmt.Sprint("interleave/", i), func(t *testing.T) {
238 dst := make([]byte, 4*len(src))
239 c.Reset()
240 nSpan, _ := c.Span([]byte(src[:i]), false)
241 copy(dst, src[:nSpan])
242 nTransform, _, _ := c.Transform(dst[nSpan:], []byte(src[nSpan:]), true)
243 got := string(dst[:nSpan+nTransform])
244 if got != want {
245 t.Errorf("full string: got %q; want %q", got, want)
246 }
247 })
248 }
249 }
250
251 func TestHandover(t *testing.T) {
252 testCases := []struct {
253 desc string
254 t Caser
255 first, second string
256 }{{
257 "title/nosigma/single midword",
258 Title(language.Und, HandleFinalSigma(false)),
259 "A.", "a",
260 }, {
261 "title/nosigma/single midword",
262 Title(language.Und, HandleFinalSigma(false)),
263 "A", ".a",
264 }, {
265 "title/nosigma/double midword",
266 Title(language.Und, HandleFinalSigma(false)),
267 "A..", "a",
268 }, {
269 "title/nosigma/double midword",
270 Title(language.Und, HandleFinalSigma(false)),
271 "A.", ".a",
272 }, {
273 "title/nosigma/double midword",
274 Title(language.Und, HandleFinalSigma(false)),
275 "A", "..a",
276 }, {
277 "title/sigma/single midword",
278 Title(language.Und),
279 "ΟΣ.", "a",
280 }, {
281 "title/sigma/single midword",
282 Title(language.Und),
283 "ΟΣ", ".a",
284 }, {
285 "title/sigma/double midword",
286 Title(language.Und),
287 "ΟΣ..", "a",
288 }, {
289 "title/sigma/double midword",
290 Title(language.Und),
291 "ΟΣ.", ".a",
292 }, {
293 "title/sigma/double midword",
294 Title(language.Und),
295 "ΟΣ", "..a",
296 }, {
297 "title/af/leading apostrophe",
298 Title(language.Afrikaans),
299 "'", "n bietje",
300 }}
301 for _, tc := range testCases {
302 testtext.Run(t, tc.desc, func(t *testing.T) {
303 src := tc.first + tc.second
304 want := tc.t.String(src)
305 tc.t.Reset()
306 n, _ := tc.t.Span([]byte(tc.first), false)
307
308 dst := make([]byte, len(want))
309 copy(dst, tc.first[:n])
310
311 nDst, _, _ := tc.t.Transform(dst[n:], []byte(src[n:]), true)
312 got := string(dst[:n+nDst])
313 if got != want {
314 t.Errorf("got %q; want %q", got, want)
315 }
316 })
317 }
318 }
319
320
321
322 const minBufSize = norm.MaxSegmentSize
323
324 type bufferTest struct {
325 desc, src, want string
326 firstErr error
327 dstSize, srcSize int
328 t transform.SpanningTransformer
329 }
330
331 var bufferTests []bufferTest
332
333 func init() {
334 bufferTests = []bufferTest{{
335 desc: "und/upper/short dst",
336 src: "abcdefg",
337 want: "ABCDEFG",
338 firstErr: transform.ErrShortDst,
339 dstSize: 3,
340 srcSize: minBufSize,
341 t: Upper(language.Und),
342 }, {
343 desc: "und/upper/short src",
344 src: "123é56",
345 want: "123É56",
346 firstErr: transform.ErrShortSrc,
347 dstSize: 4,
348 srcSize: 4,
349 t: Upper(language.Und),
350 }, {
351 desc: "und/upper/no error on short",
352 src: "12",
353 want: "12",
354 firstErr: nil,
355 dstSize: 1,
356 srcSize: 1,
357 t: Upper(language.Und),
358 }, {
359 desc: "und/lower/short dst",
360 src: "ABCDEFG",
361 want: "abcdefg",
362 firstErr: transform.ErrShortDst,
363 dstSize: 3,
364 srcSize: minBufSize,
365 t: Lower(language.Und),
366 }, {
367 desc: "und/lower/short src",
368 src: "123É56",
369 want: "123é56",
370 firstErr: transform.ErrShortSrc,
371 dstSize: 4,
372 srcSize: 4,
373 t: Lower(language.Und),
374 }, {
375 desc: "und/lower/no error on short",
376 src: "12",
377 want: "12",
378 firstErr: nil,
379 dstSize: 1,
380 srcSize: 1,
381 t: Lower(language.Und),
382 }, {
383 desc: "und/lower/simple (no final sigma)",
384 src: "ΟΣ ΟΣΣ",
385 want: "οσ οσσ",
386 dstSize: minBufSize,
387 srcSize: minBufSize,
388 t: Lower(language.Und, HandleFinalSigma(false)),
389 }, {
390 desc: "und/title/simple (no final sigma)",
391 src: "ΟΣ ΟΣΣ",
392 want: "Οσ Οσσ",
393 dstSize: minBufSize,
394 srcSize: minBufSize,
395 t: Title(language.Und, HandleFinalSigma(false)),
396 }, {
397 desc: "und/title/final sigma: no error",
398 src: "ΟΣ",
399 want: "Ος",
400 dstSize: minBufSize,
401 srcSize: minBufSize,
402 t: Title(language.Und),
403 }, {
404 desc: "und/title/final sigma: short source",
405 src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
406 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
407 firstErr: transform.ErrShortSrc,
408 dstSize: minBufSize,
409 srcSize: 10,
410 t: Title(language.Und),
411 }, {
412 desc: "und/title/final sigma: short destination 1",
413 src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
414 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
415 firstErr: transform.ErrShortDst,
416 dstSize: 10,
417 srcSize: minBufSize,
418 t: Title(language.Und),
419 }, {
420 desc: "und/title/final sigma: short destination 2",
421 src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
422 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
423 firstErr: transform.ErrShortDst,
424 dstSize: 9,
425 srcSize: minBufSize,
426 t: Title(language.Und),
427 }, {
428 desc: "und/title/final sigma: short destination 3",
429 src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
430 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
431 firstErr: transform.ErrShortDst,
432 dstSize: 8,
433 srcSize: minBufSize,
434 t: Title(language.Und),
435 }, {
436 desc: "und/title/clipped UTF-8 rune",
437 src: "σσσσσσσσσσσ",
438 want: "Σσσσσσσσσσσ",
439 firstErr: transform.ErrShortSrc,
440 dstSize: minBufSize,
441 srcSize: 5,
442 t: Title(language.Und),
443 }, {
444 desc: "und/title/clipped UTF-8 rune atEOF",
445 src: "σσσ" + string([]byte{0xCF}),
446 want: "Σσσ" + string([]byte{0xCF}),
447 dstSize: minBufSize,
448 srcSize: minBufSize,
449 t: Title(language.Und),
450 }, {
451
452
453
454 desc: "und/title/final sigma: max ignorables",
455 src: "ΟΣ" + strings.Repeat(".", maxIgnorable) + "a",
456 want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
457 dstSize: minBufSize,
458 srcSize: minBufSize,
459 t: Title(language.Und),
460 }, {
461
462
463
464 desc: "und/title/long string",
465 src: "AA" + strings.Repeat(".", maxIgnorable+1) + "a",
466 want: "Aa" + strings.Repeat(".", maxIgnorable+1) + "A",
467 dstSize: minBufSize,
468 srcSize: len("AA" + strings.Repeat(".", maxIgnorable+1)),
469 t: Title(language.Und),
470 }, {
471
472
473
474 desc: "und/title/final sigma: too many ignorables",
475 src: "ΟΣ" + strings.Repeat(".", maxIgnorable+1) + "a",
476 want: "Ος" + strings.Repeat(".", maxIgnorable+1) + "A",
477 dstSize: minBufSize,
478 srcSize: len("ΟΣ" + strings.Repeat(".", maxIgnorable+1)),
479 t: Title(language.Und),
480 }, {
481 desc: "und/title/final sigma: apostrophe",
482 src: "ΟΣ''a",
483 want: "Οσ''A",
484 dstSize: minBufSize,
485 srcSize: minBufSize,
486 t: Title(language.Und),
487 }, {
488 desc: "el/upper/max ignorables",
489 src: "ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313",
490 want: "Ο" + strings.Repeat("\u0321", maxIgnorable-1),
491 dstSize: minBufSize,
492 srcSize: minBufSize,
493 t: Upper(language.Greek),
494 }, {
495 desc: "el/upper/too many ignorables",
496 src: "ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
497 want: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
498 dstSize: minBufSize,
499 srcSize: len("ο" + strings.Repeat("\u0321", maxIgnorable)),
500 t: Upper(language.Greek),
501 }, {
502 desc: "el/upper/short dst",
503 src: "123ο",
504 want: "123Ο",
505 firstErr: transform.ErrShortDst,
506 dstSize: 3,
507 srcSize: minBufSize,
508 t: Upper(language.Greek),
509 }, {
510 desc: "lt/lower/max ignorables",
511 src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
512 want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
513 dstSize: minBufSize,
514 srcSize: minBufSize,
515 t: Lower(language.Lithuanian),
516 }, {
517 desc: "lt/lower/too many ignorables",
518 src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
519 want: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
520 dstSize: minBufSize,
521 srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)),
522 t: Lower(language.Lithuanian),
523 }, {
524 desc: "lt/lower/decomposition with short dst buffer 1",
525 src: "aaaaa\u00cc",
526 firstErr: transform.ErrShortDst,
527 want: "aaaaai\u0307\u0300",
528 dstSize: 5,
529 srcSize: minBufSize,
530 t: Lower(language.Lithuanian),
531 }, {
532 desc: "lt/lower/decomposition with short dst buffer 2",
533 src: "aaaa\u00cc",
534 firstErr: transform.ErrShortDst,
535 want: "aaaai\u0307\u0300",
536 dstSize: 5,
537 srcSize: minBufSize,
538 t: Lower(language.Lithuanian),
539 }, {
540 desc: "lt/upper/max ignorables",
541 src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
542 want: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
543 dstSize: minBufSize,
544 srcSize: minBufSize,
545 t: Upper(language.Lithuanian),
546 }, {
547 desc: "lt/upper/too many ignorables",
548 src: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
549 want: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
550 dstSize: minBufSize,
551 srcSize: len("i" + strings.Repeat("\u0321", maxIgnorable)),
552 t: Upper(language.Lithuanian),
553 }, {
554 desc: "lt/upper/short dst",
555 src: "12i\u0307\u0300",
556 want: "12\u00cc",
557 firstErr: transform.ErrShortDst,
558 dstSize: 3,
559 srcSize: minBufSize,
560 t: Upper(language.Lithuanian),
561 }, {
562 desc: "aztr/lower/max ignorables",
563 src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
564 want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
565 dstSize: minBufSize,
566 srcSize: minBufSize,
567 t: Lower(language.Turkish),
568 }, {
569 desc: "aztr/lower/too many ignorables",
570 src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
571 want: "\u0131" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
572 dstSize: minBufSize,
573 srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)),
574 t: Lower(language.Turkish),
575 }, {
576 desc: "nl/title/pre-IJ cutoff",
577 src: " ij",
578 want: " IJ",
579 firstErr: transform.ErrShortDst,
580 dstSize: 2,
581 srcSize: minBufSize,
582 t: Title(language.Dutch),
583 }, {
584 desc: "nl/title/mid-IJ cutoff",
585 src: " ij",
586 want: " IJ",
587 firstErr: transform.ErrShortDst,
588 dstSize: 3,
589 srcSize: minBufSize,
590 t: Title(language.Dutch),
591 }, {
592 desc: "af/title/apostrophe",
593 src: "'n bietje",
594 want: "'n Bietje",
595 firstErr: transform.ErrShortDst,
596 dstSize: 3,
597 srcSize: minBufSize,
598 t: Title(language.Afrikaans),
599 }}
600 }
601
602 func TestShortBuffersAndOverflow(t *testing.T) {
603 for i, tt := range bufferTests {
604 testtext.Run(t, tt.desc, func(t *testing.T) {
605 buf := make([]byte, tt.dstSize)
606 got := []byte{}
607 var nSrc, nDst int
608 var err error
609 for p := 0; p < len(tt.src); p += nSrc {
610 q := p + tt.srcSize
611 if q > len(tt.src) {
612 q = len(tt.src)
613 }
614 nDst, nSrc, err = tt.t.Transform(buf, []byte(tt.src[p:q]), q == len(tt.src))
615 got = append(got, buf[:nDst]...)
616
617 if p == 0 && err != tt.firstErr {
618 t.Errorf("%d:%s:\n error was %v; want %v", i, tt.desc, err, tt.firstErr)
619 break
620 }
621 }
622 if string(got) != tt.want {
623 t.Errorf("%d:%s:\ngot %+q;\nwant %+q", i, tt.desc, got, tt.want)
624 }
625 testHandover(t, Caser{tt.t}, tt.src)
626 })
627 }
628 }
629
630 func TestSpan(t *testing.T) {
631 for _, tt := range []struct {
632 desc string
633 src string
634 want string
635 atEOF bool
636 err error
637 t Caser
638 }{{
639 desc: "und/upper/basic",
640 src: "abcdefg",
641 want: "",
642 atEOF: true,
643 err: transform.ErrEndOfSpan,
644 t: Upper(language.Und),
645 }, {
646 desc: "und/upper/short src",
647 src: "123É"[:4],
648 want: "123",
649 atEOF: false,
650 err: transform.ErrShortSrc,
651 t: Upper(language.Und),
652 }, {
653 desc: "und/upper/no error on short",
654 src: "12",
655 want: "12",
656 atEOF: false,
657 t: Upper(language.Und),
658 }, {
659 desc: "und/lower/basic",
660 src: "ABCDEFG",
661 want: "",
662 atEOF: true,
663 err: transform.ErrEndOfSpan,
664 t: Lower(language.Und),
665 }, {
666 desc: "und/lower/short src num",
667 src: "123é"[:4],
668 want: "123",
669 atEOF: false,
670 err: transform.ErrShortSrc,
671 t: Lower(language.Und),
672 }, {
673 desc: "und/lower/short src greek",
674 src: "αβγé"[:7],
675 want: "αβγ",
676 atEOF: false,
677 err: transform.ErrShortSrc,
678 t: Lower(language.Und),
679 }, {
680 desc: "und/lower/no error on short",
681 src: "12",
682 want: "12",
683 atEOF: false,
684 t: Lower(language.Und),
685 }, {
686 desc: "und/lower/simple (no final sigma)",
687 src: "ος οσσ",
688 want: "οσ οσσ",
689 atEOF: true,
690 t: Lower(language.Und, HandleFinalSigma(false)),
691 }, {
692 desc: "und/title/simple (no final sigma)",
693 src: "Οσ Οσσ",
694 want: "Οσ Οσσ",
695 atEOF: true,
696 t: Title(language.Und, HandleFinalSigma(false)),
697 }, {
698 desc: "und/lower/final sigma: no error",
699 src: "οΣ",
700 want: "ο",
701 err: transform.ErrEndOfSpan,
702 t: Lower(language.Und),
703 }, {
704 desc: "und/title/final sigma: no error",
705 src: "ΟΣ",
706 want: "Ο",
707 err: transform.ErrEndOfSpan,
708 t: Title(language.Und),
709 }, {
710 desc: "und/title/final sigma: no short source!",
711 src: "ΟσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσΣ",
712 want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσ",
713 err: transform.ErrEndOfSpan,
714 t: Title(language.Und),
715 }, {
716 desc: "und/title/clipped UTF-8 rune",
717 src: "Σσ" + string([]byte{0xCF}),
718 want: "Σσ",
719 atEOF: false,
720 err: transform.ErrShortSrc,
721 t: Title(language.Und),
722 }, {
723 desc: "und/title/clipped UTF-8 rune atEOF",
724 src: "Σσσ" + string([]byte{0xCF}),
725 want: "Σσσ" + string([]byte{0xCF}),
726 atEOF: true,
727 t: Title(language.Und),
728 }, {
729
730
731
732 desc: "und/title/long string",
733 src: "A" + strings.Repeat("a", maxIgnorable+5),
734 want: "A" + strings.Repeat("a", maxIgnorable+5),
735 t: Title(language.Und),
736 }, {
737
738
739
740 desc: "und/title/cyrillic",
741 src: "При",
742 want: "При",
743 atEOF: true,
744 t: Title(language.Und, HandleFinalSigma(false)),
745 }, {
746
747
748
749 desc: "und/title/final sigma: max ignorables",
750 src: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
751 want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
752 t: Title(language.Und),
753 }, {
754 desc: "el/upper/max ignorables - not implemented",
755 src: "Ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313",
756 want: "",
757 err: transform.ErrEndOfSpan,
758 t: Upper(language.Greek),
759 }, {
760 desc: "el/upper/too many ignorables - not implemented",
761 src: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
762 want: "",
763 err: transform.ErrEndOfSpan,
764 t: Upper(language.Greek),
765 }, {
766 desc: "el/upper/short dst",
767 src: "123ο",
768 want: "",
769 err: transform.ErrEndOfSpan,
770 t: Upper(language.Greek),
771 }, {
772 desc: "lt/lower/max ignorables",
773 src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
774 want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
775 t: Lower(language.Lithuanian),
776 }, {
777 desc: "lt/lower/isLower",
778 src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
779 want: "",
780 err: transform.ErrEndOfSpan,
781 t: Lower(language.Lithuanian),
782 }, {
783 desc: "lt/lower/not identical",
784 src: "aaaaa\u00cc",
785 err: transform.ErrEndOfSpan,
786 want: "aaaaa",
787 t: Lower(language.Lithuanian),
788 }, {
789 desc: "lt/lower/identical",
790 src: "aaaai\u0307\u0300",
791 want: "aaaai\u0307\u0300",
792 t: Lower(language.Lithuanian),
793 }, {
794 desc: "lt/upper/not implemented",
795 src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
796 want: "",
797 err: transform.ErrEndOfSpan,
798 t: Upper(language.Lithuanian),
799 }, {
800 desc: "lt/upper/not implemented, ascii",
801 src: "AB",
802 want: "",
803 err: transform.ErrEndOfSpan,
804 t: Upper(language.Lithuanian),
805 }, {
806 desc: "nl/title/pre-IJ cutoff",
807 src: " IJ",
808 want: " IJ",
809 t: Title(language.Dutch),
810 }, {
811 desc: "nl/title/mid-IJ cutoff",
812 src: " Ia",
813 want: " Ia",
814 t: Title(language.Dutch),
815 }, {
816 desc: "af/title/apostrophe",
817 src: "'n Bietje",
818 want: "'n Bietje",
819 t: Title(language.Afrikaans),
820 }, {
821 desc: "af/title/apostrophe-incorrect",
822 src: "'N Bietje",
823
824
825
826 want: "",
827 err: transform.ErrEndOfSpan,
828 t: Title(language.Afrikaans),
829 }} {
830 testtext.Run(t, tt.desc, func(t *testing.T) {
831 for p := 0; p < len(tt.want); p += utf8.RuneLen([]rune(tt.src[p:])[0]) {
832 tt.t.Reset()
833 n, err := tt.t.Span([]byte(tt.src[:p]), false)
834 if err != nil && err != transform.ErrShortSrc {
835 t.Errorf("early failure:Span(%+q): %v (%d < %d)", tt.src[:p], err, n, len(tt.want))
836 break
837 }
838 }
839 tt.t.Reset()
840 n, err := tt.t.Span([]byte(tt.src), tt.atEOF)
841 if n != len(tt.want) || err != tt.err {
842 t.Errorf("Span(%+q, %v): got %d, %v; want %d, %v", tt.src, tt.atEOF, n, err, len(tt.want), tt.err)
843 }
844 testHandover(t, tt.t, tt.src)
845 })
846 }
847 }
848
849 var txtASCII = strings.Repeat("The quick brown fox jumps over the lazy dog. ", 50)
850
851
852 const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả. Nếu bạn sử
853 dụng, chuyển đổi, hoặc xây dựng dự án từ nội dung được chia sẻ này, bạn phải áp
854 dụng giấy phép này hoặc một giấy phép khác có các điều khoản tương tự như giấy
855 phép này cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào trên đây
856 cũng có thể được miễn bỏ nếu bạn được sự cho phép của người sở hữu bản quyền.
857 Phạm vi công chúng — Khi tác phẩm hoặc bất kỳ chương nào của tác phẩm đã trong
858 vùng dành cho công chúng theo quy định của pháp luật thì tình trạng của nó không
859 bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
860
861
862 const txt_cn = `您可以自由: 复制、发行、展览、表演、放映、
863 广播或通过信息网络传播本作品 创作演绎作品
864 对本作品进行商业性使用 惟须遵守下列条件:
865 署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。
866 相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作,
867 您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。`
868
869
870 const txt_ru = `При обязательном соблюдении следующих условий: Attribution — Вы
871 должны атрибутировать произведение (указывать автора и источник) в порядке,
872 предусмотренном автором или лицензиаром (но только так, чтобы никоим образом не
873 подразумевалось, что они поддерживают вас или использование вами данного
874 произведения). Υπό τις ακόλουθες προϋποθέσεις:`
875
876
877 const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με
878 τον τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια (χωρίς
879 όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή τη χρήση του έργου
880 από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε, τροποποιήσετε ή δημιουργήσετε
881 περαιτέρω βασισμένοι στο έργο θα μπορείτε να διανέμετε το έργο που θα προκύψει
882 μόνο με την ίδια ή παρόμοια άδεια.`
883
884 const txtNonASCII = txt_vn + txt_cn + txt_ru + txt_gr
885
886
887
888 func BenchmarkCasers(b *testing.B) {
889 for _, s := range []struct{ name, text string }{
890 {"ascii", txtASCII},
891 {"nonASCII", txtNonASCII},
892 {"short", "При"},
893 } {
894 src := []byte(s.text)
895
896 for _, f := range []struct {
897 name string
898 fn func(b []byte) []byte
899 }{
900 {"lower", bytes.ToLower},
901 {"title", bytes.ToTitle},
902 {"upper", bytes.ToUpper},
903 } {
904 testtext.Bench(b, path.Join(s.name, "bytes", f.name), func(b *testing.B) {
905 b.SetBytes(int64(len(src)))
906 for i := 0; i < b.N; i++ {
907 f.fn(src)
908 }
909 })
910 }
911 for _, t := range []struct {
912 name string
913 caser transform.SpanningTransformer
914 }{
915 {"fold/default", Fold()},
916 {"upper/default", Upper(language.Und)},
917 {"lower/sigma", Lower(language.Und)},
918 {"lower/simple", Lower(language.Und, HandleFinalSigma(false))},
919 {"title/sigma", Title(language.Und)},
920 {"title/simple", Title(language.Und, HandleFinalSigma(false))},
921 } {
922 c := Caser{t.caser}
923 dst := make([]byte, len(src))
924 testtext.Bench(b, path.Join(s.name, t.name, "transform"), func(b *testing.B) {
925 b.SetBytes(int64(len(src)))
926 for i := 0; i < b.N; i++ {
927 c.Reset()
928 c.Transform(dst, src, true)
929 }
930 })
931
932
933 if strings.HasSuffix(t.name, "/simple") {
934 continue
935 }
936 spanSrc := c.Bytes(src)
937 testtext.Bench(b, path.Join(s.name, t.name, "span"), func(b *testing.B) {
938 c.Reset()
939 if n, _ := c.Span(spanSrc, true); n < len(spanSrc) {
940 b.Fatalf("spanner is not recognizing text %q as done (at %d)", spanSrc, n)
941 }
942 b.SetBytes(int64(len(spanSrc)))
943 for i := 0; i < b.N; i++ {
944 c.Reset()
945 c.Span(spanSrc, true)
946 }
947 })
948 }
949 }
950 }
951
View as plain text