1
2
3
4
5 package runes
6
7 import (
8 "strings"
9 "testing"
10 "unicode/utf8"
11
12 "golang.org/x/text/internal/testtext"
13 "golang.org/x/text/transform"
14 )
15
16 type transformTest struct {
17 desc string
18 szDst int
19 atEOF bool
20 repl string
21 in string
22 out string
23 outFull string
24 err error
25 errSpan error
26 nSpan int
27
28 t transform.SpanningTransformer
29 }
30
31 const large = 10240
32
33 func (tt *transformTest) check(t *testing.T, i int) {
34 if tt.t == nil {
35 return
36 }
37 dst := make([]byte, tt.szDst)
38 src := []byte(tt.in)
39 nDst, nSrc, err := tt.t.Transform(dst, src, tt.atEOF)
40 if err != tt.err {
41 t.Errorf("%d:%s:error: got %v; want %v", i, tt.desc, err, tt.err)
42 }
43 if got := string(dst[:nDst]); got != tt.out {
44 t.Errorf("%d:%s:out: got %q; want %q", i, tt.desc, got, tt.out)
45 }
46
47
48
49 out := make([]byte, large)
50 n := copy(out, dst[:nDst])
51 nDst, _, _ = tt.t.Transform(out[n:], src[nSrc:], true)
52 if got, want := string(out[:n+nDst]), tt.outFull; got != want {
53 t.Errorf("%d:%s:outFull: got %q; want %q", i, tt.desc, got, want)
54 }
55
56 tt.t.Reset()
57 p := 0
58 for ; p < len(tt.in) && p < len(tt.outFull) && tt.in[p] == tt.outFull[p]; p++ {
59 }
60 if tt.nSpan != 0 {
61 p = tt.nSpan
62 }
63 if n, err = tt.t.Span([]byte(tt.in), tt.atEOF); n != p || err != tt.errSpan {
64 t.Errorf("%d:%s:span: got %d, %v; want %d, %v", i, tt.desc, n, err, p, tt.errSpan)
65 }
66 }
67
68 func idem(r rune) rune { return r }
69
70 func TestMap(t *testing.T) {
71 runes := []rune{'a', 'ç', '中', '\U00012345', 'a'}
72
73 rotate := Map(func(r rune) rune {
74 for i, m := range runes {
75 if m == r {
76 return runes[i+1]
77 }
78 }
79 return r
80 })
81
82 for i, tt := range []transformTest{{
83 desc: "empty",
84 szDst: large,
85 atEOF: true,
86 in: "",
87 out: "",
88 outFull: "",
89 t: rotate,
90 }, {
91 desc: "no change",
92 szDst: 1,
93 atEOF: true,
94 in: "b",
95 out: "b",
96 outFull: "b",
97 t: rotate,
98 }, {
99 desc: "short dst",
100 szDst: 2,
101 atEOF: true,
102 in: "aaaa",
103 out: "ç",
104 outFull: "çççç",
105 err: transform.ErrShortDst,
106 errSpan: transform.ErrEndOfSpan,
107 t: rotate,
108 }, {
109 desc: "short dst ascii, no change",
110 szDst: 2,
111 atEOF: true,
112 in: "bbb",
113 out: "bb",
114 outFull: "bbb",
115 err: transform.ErrShortDst,
116 t: rotate,
117 }, {
118 desc: "short dst writing error",
119 szDst: 2,
120 atEOF: false,
121 in: "a\x80",
122 out: "ç",
123 outFull: "ç\ufffd",
124 err: transform.ErrShortDst,
125 errSpan: transform.ErrEndOfSpan,
126 t: rotate,
127 }, {
128 desc: "short dst writing incomplete rune",
129 szDst: 2,
130 atEOF: true,
131 in: "a\xc0",
132 out: "ç",
133 outFull: "ç\ufffd",
134 err: transform.ErrShortDst,
135 errSpan: transform.ErrEndOfSpan,
136 t: rotate,
137 }, {
138 desc: "short dst, longer",
139 szDst: 5,
140 atEOF: true,
141 in: "Hellø",
142 out: "Hell",
143 outFull: "Hellø",
144 err: transform.ErrShortDst,
145 t: rotate,
146 }, {
147 desc: "short dst, single",
148 szDst: 1,
149 atEOF: false,
150 in: "ø",
151 out: "",
152 outFull: "ø",
153 err: transform.ErrShortDst,
154 t: Map(idem),
155 }, {
156 desc: "short dst, longer, writing error",
157 szDst: 8,
158 atEOF: false,
159 in: "\x80Hello\x80",
160 out: "\ufffdHello",
161 outFull: "\ufffdHello\ufffd",
162 err: transform.ErrShortDst,
163 errSpan: transform.ErrEndOfSpan,
164 t: rotate,
165 }, {
166 desc: "short src",
167 szDst: 2,
168 atEOF: false,
169 in: "a\xc2",
170 out: "ç",
171 outFull: "ç\ufffd",
172 err: transform.ErrShortSrc,
173 errSpan: transform.ErrEndOfSpan,
174 t: rotate,
175 }, {
176 desc: "invalid input, atEOF",
177 szDst: large,
178 atEOF: true,
179 in: "\x80",
180 out: "\ufffd",
181 outFull: "\ufffd",
182 errSpan: transform.ErrEndOfSpan,
183 t: rotate,
184 }, {
185 desc: "invalid input, !atEOF",
186 szDst: large,
187 atEOF: false,
188 in: "\x80",
189 out: "\ufffd",
190 outFull: "\ufffd",
191 errSpan: transform.ErrEndOfSpan,
192 t: rotate,
193 }, {
194 desc: "incomplete rune !atEOF",
195 szDst: large,
196 atEOF: false,
197 in: "\xc2",
198 out: "",
199 outFull: "\ufffd",
200 err: transform.ErrShortSrc,
201 errSpan: transform.ErrShortSrc,
202 t: rotate,
203 }, {
204 desc: "invalid input, incomplete rune atEOF",
205 szDst: large,
206 atEOF: true,
207 in: "\xc2",
208 out: "\ufffd",
209 outFull: "\ufffd",
210 errSpan: transform.ErrEndOfSpan,
211 t: rotate,
212 }, {
213 desc: "misc correct",
214 szDst: large,
215 atEOF: true,
216 in: "a\U00012345 ç!",
217 out: "ça 中!",
218 outFull: "ça 中!",
219 errSpan: transform.ErrEndOfSpan,
220 t: rotate,
221 }, {
222 desc: "misc correct and invalid",
223 szDst: large,
224 atEOF: true,
225 in: "Hello\x80 w\x80orl\xc0d!\xc0",
226 out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
227 outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
228 errSpan: transform.ErrEndOfSpan,
229 t: rotate,
230 }, {
231 desc: "misc correct and invalid, short src",
232 szDst: large,
233 atEOF: false,
234 in: "Hello\x80 w\x80orl\xc0d!\xc2",
235 out: "Hello\ufffd w\ufffdorl\ufffdd!",
236 outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
237 err: transform.ErrShortSrc,
238 errSpan: transform.ErrEndOfSpan,
239 t: rotate,
240 }, {
241 desc: "misc correct and invalid, short src, replacing RuneError",
242 szDst: large,
243 atEOF: false,
244 in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
245 out: "Hel?lo? w?orl?d!",
246 outFull: "Hel?lo? w?orl?d!?",
247 errSpan: transform.ErrEndOfSpan,
248 err: transform.ErrShortSrc,
249 t: Map(func(r rune) rune {
250 if r == utf8.RuneError {
251 return '?'
252 }
253 return r
254 }),
255 }} {
256 tt.check(t, i)
257 }
258 }
259
260 func TestRemove(t *testing.T) {
261 remove := Remove(Predicate(func(r rune) bool {
262 return strings.ContainsRune("aeiou\u0300\uFF24\U00012345", r)
263 }))
264
265 for i, tt := range []transformTest{
266 0: {
267 szDst: large,
268 atEOF: true,
269 in: "",
270 out: "",
271 outFull: "",
272 t: remove,
273 },
274 1: {
275 szDst: 0,
276 atEOF: true,
277 in: "aaaa",
278 out: "",
279 outFull: "",
280 errSpan: transform.ErrEndOfSpan,
281 t: remove,
282 },
283 2: {
284 szDst: 1,
285 atEOF: true,
286 in: "aaaa",
287 out: "",
288 outFull: "",
289 errSpan: transform.ErrEndOfSpan,
290 t: remove,
291 },
292 3: {
293 szDst: 1,
294 atEOF: true,
295 in: "baaaa",
296 out: "b",
297 outFull: "b",
298 errSpan: transform.ErrEndOfSpan,
299 t: remove,
300 },
301 4: {
302 szDst: 2,
303 atEOF: true,
304 in: "açaaa",
305 out: "ç",
306 outFull: "ç",
307 errSpan: transform.ErrEndOfSpan,
308 t: remove,
309 },
310 5: {
311 szDst: 2,
312 atEOF: true,
313 in: "aaaç",
314 out: "ç",
315 outFull: "ç",
316 errSpan: transform.ErrEndOfSpan,
317 t: remove,
318 },
319 6: {
320 szDst: 2,
321 atEOF: false,
322 in: "a\x80",
323 out: "",
324 outFull: "\ufffd",
325 err: transform.ErrShortDst,
326 errSpan: transform.ErrEndOfSpan,
327 t: remove,
328 },
329 7: {
330 szDst: 1,
331 atEOF: true,
332 in: "a\xc0",
333 out: "",
334 outFull: "\ufffd",
335 err: transform.ErrShortDst,
336 errSpan: transform.ErrEndOfSpan,
337 t: remove,
338 },
339 8: {
340 szDst: 1,
341 atEOF: false,
342 in: "a\xc2",
343 out: "",
344 outFull: "\ufffd",
345 err: transform.ErrShortSrc,
346 errSpan: transform.ErrEndOfSpan,
347 t: remove,
348 },
349 9: {
350 szDst: large,
351 atEOF: true,
352 in: "\x80",
353 out: "\ufffd",
354 outFull: "\ufffd",
355 errSpan: transform.ErrEndOfSpan,
356 t: remove,
357 },
358 10: {
359 szDst: large,
360 atEOF: false,
361 in: "\x80",
362 out: "\ufffd",
363 outFull: "\ufffd",
364 errSpan: transform.ErrEndOfSpan,
365 t: remove,
366 },
367 11: {
368 szDst: large,
369 atEOF: true,
370 in: "\xc2",
371 out: "\ufffd",
372 outFull: "\ufffd",
373 errSpan: transform.ErrEndOfSpan,
374 t: remove,
375 },
376 12: {
377 szDst: large,
378 atEOF: false,
379 in: "\xc2",
380 out: "",
381 outFull: "\ufffd",
382 err: transform.ErrShortSrc,
383 errSpan: transform.ErrShortSrc,
384 t: remove,
385 },
386 13: {
387 szDst: large,
388 atEOF: true,
389 in: "Hello \U00012345world!",
390 out: "Hll wrld!",
391 outFull: "Hll wrld!",
392 errSpan: transform.ErrEndOfSpan,
393 t: remove,
394 },
395 14: {
396 szDst: large,
397 atEOF: true,
398 in: "Hello\x80 w\x80orl\xc0d!\xc0",
399 out: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
400 outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
401 errSpan: transform.ErrEndOfSpan,
402 t: remove,
403 },
404 15: {
405 szDst: large,
406 atEOF: false,
407 in: "Hello\x80 w\x80orl\xc0d!\xc2",
408 out: "Hll\ufffd w\ufffdrl\ufffdd!",
409 outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
410 err: transform.ErrShortSrc,
411 errSpan: transform.ErrEndOfSpan,
412 t: remove,
413 },
414 16: {
415 szDst: large,
416 atEOF: false,
417 in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
418 out: "Hello world!",
419 outFull: "Hello world!",
420 err: transform.ErrShortSrc,
421 errSpan: transform.ErrEndOfSpan,
422 t: Remove(Predicate(func(r rune) bool { return r == utf8.RuneError })),
423 },
424 17: {
425 szDst: 4,
426 atEOF: true,
427 in: "Hellø",
428 out: "Hll",
429 outFull: "Hllø",
430 err: transform.ErrShortDst,
431 errSpan: transform.ErrEndOfSpan,
432 t: remove,
433 },
434 18: {
435 szDst: 4,
436 atEOF: false,
437 in: "Hellø",
438 out: "Hll",
439 outFull: "Hllø",
440 err: transform.ErrShortDst,
441 errSpan: transform.ErrEndOfSpan,
442 t: remove,
443 },
444 19: {
445 szDst: 8,
446 atEOF: false,
447 in: "\x80Hello\uFF24\x80",
448 out: "\ufffdHll",
449 outFull: "\ufffdHll\ufffd",
450 err: transform.ErrShortDst,
451 errSpan: transform.ErrEndOfSpan,
452 t: remove,
453 },
454 20: {
455 szDst: 8,
456 atEOF: false,
457 in: "Hllll",
458 out: "Hllll",
459 outFull: "Hllll",
460 t: remove,
461 }} {
462 tt.check(t, i)
463 }
464 }
465
466 func TestReplaceIllFormed(t *testing.T) {
467 replace := ReplaceIllFormed()
468
469 for i, tt := range []transformTest{
470 0: {
471 szDst: large,
472 atEOF: true,
473 in: "",
474 out: "",
475 outFull: "",
476 t: replace,
477 },
478 1: {
479 szDst: 1,
480 atEOF: true,
481 in: "aa",
482 out: "a",
483 outFull: "aa",
484 err: transform.ErrShortDst,
485 t: replace,
486 },
487 2: {
488 szDst: 1,
489 atEOF: true,
490 in: "a\x80",
491 out: "a",
492 outFull: "a\ufffd",
493 err: transform.ErrShortDst,
494 errSpan: transform.ErrEndOfSpan,
495 t: replace,
496 },
497 3: {
498 szDst: 1,
499 atEOF: true,
500 in: "a\xc2",
501 out: "a",
502 outFull: "a\ufffd",
503 err: transform.ErrShortDst,
504 errSpan: transform.ErrEndOfSpan,
505 t: replace,
506 },
507 4: {
508 szDst: large,
509 atEOF: true,
510 in: "\x80",
511 out: "\ufffd",
512 outFull: "\ufffd",
513 errSpan: transform.ErrEndOfSpan,
514 t: replace,
515 },
516 5: {
517 szDst: large,
518 atEOF: false,
519 in: "\x80",
520 out: "\ufffd",
521 outFull: "\ufffd",
522 errSpan: transform.ErrEndOfSpan,
523 t: replace,
524 },
525 6: {
526 szDst: large,
527 atEOF: true,
528 in: "\xc2",
529 out: "\ufffd",
530 outFull: "\ufffd",
531 errSpan: transform.ErrEndOfSpan,
532 t: replace,
533 },
534 7: {
535 szDst: large,
536 atEOF: false,
537 in: "\xc2",
538 out: "",
539 outFull: "\ufffd",
540 err: transform.ErrShortSrc,
541 errSpan: transform.ErrShortSrc,
542 t: replace,
543 },
544 8: {
545 szDst: large,
546 atEOF: true,
547 in: "Hello world!",
548 out: "Hello world!",
549 outFull: "Hello world!",
550 t: replace,
551 },
552 9: {
553 szDst: large,
554 atEOF: true,
555 in: "Hello\x80 w\x80orl\xc2d!\xc2",
556 out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
557 outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
558 errSpan: transform.ErrEndOfSpan,
559 t: replace,
560 },
561 10: {
562 szDst: large,
563 atEOF: false,
564 in: "Hello\x80 w\x80orl\xc2d!\xc2",
565 out: "Hello\ufffd w\ufffdorl\ufffdd!",
566 outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
567 err: transform.ErrShortSrc,
568 errSpan: transform.ErrEndOfSpan,
569 t: replace,
570 },
571 16: {
572 szDst: 10,
573 atEOF: false,
574 in: "\x80Hello\x80",
575 out: "\ufffdHello",
576 outFull: "\ufffdHello\ufffd",
577 err: transform.ErrShortDst,
578 errSpan: transform.ErrEndOfSpan,
579 t: replace,
580 },
581 17: {
582 szDst: 10,
583 atEOF: false,
584 in: "\ufffdHello\ufffd",
585 out: "\ufffdHello",
586 outFull: "\ufffdHello\ufffd",
587 err: transform.ErrShortDst,
588 t: replace,
589 },
590 } {
591 tt.check(t, i)
592 }
593 }
594
595 func TestMapAlloc(t *testing.T) {
596 if n := testtext.AllocsPerRun(3, func() {
597 Map(idem).Transform(nil, nil, false)
598 }); n > 0 {
599 t.Errorf("got %f; want 0", n)
600 }
601 }
602
603 func rmNop(r rune) bool { return false }
604
605 func TestRemoveAlloc(t *testing.T) {
606 if n := testtext.AllocsPerRun(3, func() {
607 Remove(Predicate(rmNop)).Transform(nil, nil, false)
608 }); n > 0 {
609 t.Errorf("got %f; want 0", n)
610 }
611 }
612
613 func TestReplaceIllFormedAlloc(t *testing.T) {
614 if n := testtext.AllocsPerRun(3, func() {
615 ReplaceIllFormed().Transform(nil, nil, false)
616 }); n > 0 {
617 t.Errorf("got %f; want 0", n)
618 }
619 }
620
621 func doBench(b *testing.B, t Transformer) {
622 for _, bc := range []struct{ name, data string }{
623 {"ascii", testtext.ASCII},
624 {"3byte", testtext.ThreeByteUTF8},
625 } {
626 dst := make([]byte, 2*len(bc.data))
627 src := []byte(bc.data)
628
629 testtext.Bench(b, bc.name+"/transform", func(b *testing.B) {
630 b.SetBytes(int64(len(src)))
631 for i := 0; i < b.N; i++ {
632 t.Transform(dst, src, true)
633 }
634 })
635 src = t.Bytes(src)
636 t.Reset()
637 testtext.Bench(b, bc.name+"/span", func(b *testing.B) {
638 b.SetBytes(int64(len(src)))
639 for i := 0; i < b.N; i++ {
640 t.Span(src, true)
641 }
642 })
643 }
644 }
645
646 func BenchmarkRemove(b *testing.B) {
647 doBench(b, Remove(Predicate(func(r rune) bool { return r == 'e' })))
648 }
649
650 func BenchmarkMapAll(b *testing.B) {
651 doBench(b, Map(func(r rune) rune { return 'a' }))
652 }
653
654 func BenchmarkMapNone(b *testing.B) {
655 doBench(b, Map(func(r rune) rune { return r }))
656 }
657
658 func BenchmarkReplaceIllFormed(b *testing.B) {
659 doBench(b, ReplaceIllFormed())
660 }
661
662 var (
663 input = strings.Repeat("Thé qüick brøwn føx jumps øver the lazy døg. ", 100)
664 )
665
View as plain text