1
2
3
4
5 package unicode
6
7 import (
8 "testing"
9
10 "golang.org/x/text/encoding"
11 "golang.org/x/text/encoding/charmap"
12 "golang.org/x/text/encoding/internal/enctest"
13 "golang.org/x/text/transform"
14 )
15
16 func TestBasics(t *testing.T) {
17 testCases := []struct {
18 e encoding.Encoding
19 encPrefix string
20 encSuffix string
21 encoded string
22 utf8 string
23 }{{
24 e: utf16BEIB,
25 encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
26 utf8: "\x57\u00e4\U0001d565",
27 }, {
28 e: utf16BEEB,
29 encPrefix: "\xfe\xff",
30 encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
31 utf8: "\x57\u00e4\U0001d565",
32 }, {
33 e: utf16LEIB,
34 encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd",
35 utf8: "\x57\u00e4\U0001d565",
36 }, {
37 e: utf16LEEB,
38 encPrefix: "\xff\xfe",
39 encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd",
40 utf8: "\x57\u00e4\U0001d565",
41 }}
42
43 for _, tc := range testCases {
44 enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix)
45 }
46 }
47
48 func TestFiles(t *testing.T) {
49 enctest.TestFile(t, UTF8)
50 enctest.TestFile(t, utf16LEIB)
51 }
52
53 func BenchmarkEncoding(b *testing.B) {
54 enctest.Benchmark(b, UTF8)
55 enctest.Benchmark(b, utf16LEIB)
56 }
57
58 var (
59 utf16LEIB = UTF16(LittleEndian, IgnoreBOM)
60 utf16LEUB = UTF16(LittleEndian, UseBOM)
61 utf16LEEB = UTF16(LittleEndian, ExpectBOM)
62 utf16BEIB = UTF16(BigEndian, IgnoreBOM)
63 utf16BEUB = UTF16(BigEndian, UseBOM)
64 utf16BEEB = UTF16(BigEndian, ExpectBOM)
65 )
66
67 func TestUTF16(t *testing.T) {
68 testCases := []struct {
69 desc string
70 src string
71 notEOF bool
72 sizeDst int
73 want string
74 nSrc int
75 err error
76 t transform.Transformer
77 }{{
78 desc: "utf-16 IgnoreBOM dec: empty string",
79 t: utf16BEIB.NewDecoder(),
80 }, {
81 desc: "utf-16 UseBOM dec: empty string",
82 t: utf16BEUB.NewDecoder(),
83 }, {
84 desc: "utf-16 ExpectBOM dec: empty string",
85 err: ErrMissingBOM,
86 t: utf16BEEB.NewDecoder(),
87 }, {
88 desc: "utf-16 dec: BOM determines encoding BE (RFC 2781:3.3)",
89 src: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
90 sizeDst: 100,
91 want: "\U00012345=Ra",
92 nSrc: 12,
93 t: utf16BEUB.NewDecoder(),
94 }, {
95 desc: "utf-16 dec: BOM determines encoding LE (RFC 2781:3.3)",
96 src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
97 sizeDst: 100,
98 want: "\U00012345=Ra",
99 nSrc: 12,
100 t: utf16LEUB.NewDecoder(),
101 }, {
102 desc: "utf-16 dec: BOM determines encoding LE, change default (RFC 2781:3.3)",
103 src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
104 sizeDst: 100,
105 want: "\U00012345=Ra",
106 nSrc: 12,
107 t: utf16BEUB.NewDecoder(),
108 }, {
109 desc: "utf-16 dec: Fail on missing BOM when required",
110 src: "\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x00\x52\x00\x61",
111 sizeDst: 100,
112 want: "",
113 nSrc: 0,
114 err: ErrMissingBOM,
115 t: utf16BEEB.NewDecoder(),
116 }, {
117 desc: "utf-16 dec: Fail on single byte missing BOM when required",
118 src: "\x00",
119 sizeDst: 4,
120 t: utf16BEEB.NewDecoder(),
121 err: ErrMissingBOM,
122 }, {
123 desc: "utf-16 dec: Fail on short src missing BOM when required",
124 src: "\x00",
125 notEOF: true,
126 sizeDst: 4,
127 t: utf16BEEB.NewDecoder(),
128 err: transform.ErrShortSrc,
129 }, {
130 desc: "utf-16 dec: SHOULD interpret text as big-endian when BOM not present (RFC 2781:4.3)",
131 src: "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
132 sizeDst: 100,
133 want: "\U00012345=Ra",
134 nSrc: 10,
135 t: utf16BEUB.NewDecoder(),
136 }, {
137 desc: "utf-16 dec: incorrect UTF-16: odd bytes",
138 src: "\x00",
139 sizeDst: 100,
140 want: "\uFFFD",
141 nSrc: 1,
142 t: utf16BEUB.NewDecoder(),
143 }, {
144 desc: "utf-16 dec: Fail on incorrect UTF-16: short source odd bytes",
145 src: "\x00",
146 notEOF: true,
147 sizeDst: 100,
148 t: utf16BEUB.NewDecoder(),
149 err: transform.ErrShortSrc,
150 }, {
151
152
153 desc: "utf-16le dec: incorrect BOM is an error (RFC 2781:4.1)",
154 src: "\xFE\xFF\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
155 sizeDst: 100,
156 want: "\uFFFE\U00012345=Ra",
157 nSrc: 12,
158 t: utf16LEIB.NewDecoder(),
159 }, {
160 desc: "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)",
161 src: "\U00012345=Ra",
162 sizeDst: 100,
163 want: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
164 nSrc: 7,
165 t: utf16LEUB.NewEncoder(),
166 }, {
167 desc: "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)",
168 src: "\U00012345=Ra",
169 sizeDst: 100,
170 want: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
171 nSrc: 7,
172 t: utf16BEUB.NewEncoder(),
173 }, {
174 desc: "utf-16le enc: MUST NOT write BOM (RFC 2781:3.3)",
175 src: "\U00012345=Ra",
176 sizeDst: 100,
177 want: "\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
178 nSrc: 7,
179 t: utf16LEIB.NewEncoder(),
180 }, {
181 desc: "utf-16be dec: incorrect UTF-16: odd bytes",
182 src: "\x00",
183 sizeDst: 100,
184 want: "\uFFFD",
185 nSrc: 1,
186 t: utf16BEIB.NewDecoder(),
187 }, {
188 desc: "utf-16be dec: unpaired surrogate, odd bytes",
189 src: "\xD8\x45\x00",
190 sizeDst: 100,
191 want: "\uFFFD\uFFFD",
192 nSrc: 3,
193 t: utf16BEIB.NewDecoder(),
194 }, {
195 desc: "utf-16be dec: unpaired low surrogate + valid text",
196 src: "\xD8\x45\x00a",
197 sizeDst: 100,
198 want: "\uFFFDa",
199 nSrc: 4,
200 t: utf16BEIB.NewDecoder(),
201 }, {
202 desc: "utf-16be dec: unpaired low surrogate + valid text + single byte",
203 src: "\xD8\x45\x00ab",
204 sizeDst: 100,
205 want: "\uFFFDa\uFFFD",
206 nSrc: 5,
207 t: utf16BEIB.NewDecoder(),
208 }, {
209 desc: "utf-16le dec: unpaired high surrogate",
210 src: "\x00\x00\x00\xDC\x12\xD8",
211 sizeDst: 100,
212 want: "\x00\uFFFD\uFFFD",
213 nSrc: 6,
214 t: utf16LEIB.NewDecoder(),
215 }, {
216 desc: "utf-16be dec: two unpaired low surrogates",
217 src: "\xD8\x45\xD8\x12",
218 sizeDst: 100,
219 want: "\uFFFD\uFFFD",
220 nSrc: 4,
221 t: utf16BEIB.NewDecoder(),
222 }, {
223 desc: "utf-16be dec: short dst",
224 src: "\x00a",
225 sizeDst: 0,
226 want: "",
227 nSrc: 0,
228 t: utf16BEIB.NewDecoder(),
229 err: transform.ErrShortDst,
230 }, {
231 desc: "utf-16be dec: short dst surrogate",
232 src: "\xD8\xF5\xDC\x12",
233 sizeDst: 3,
234 want: "",
235 nSrc: 0,
236 t: utf16BEIB.NewDecoder(),
237 err: transform.ErrShortDst,
238 }, {
239 desc: "utf-16be dec: short dst trailing byte",
240 src: "\x00",
241 sizeDst: 2,
242 want: "",
243 nSrc: 0,
244 t: utf16BEIB.NewDecoder(),
245 err: transform.ErrShortDst,
246 }, {
247 desc: "utf-16be dec: short src",
248 src: "\x00",
249 notEOF: true,
250 sizeDst: 3,
251 want: "",
252 nSrc: 0,
253 t: utf16BEIB.NewDecoder(),
254 err: transform.ErrShortSrc,
255 }, {
256 desc: "utf-16 enc",
257 src: "\U00012345=Ra",
258 sizeDst: 100,
259 want: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
260 nSrc: 7,
261 t: utf16BEUB.NewEncoder(),
262 }, {
263 desc: "utf-16 enc: short dst normal",
264 src: "\U00012345=Ra",
265 sizeDst: 9,
266 want: "\xD8\x08\xDF\x45\x00\x3D\x00\x52",
267 nSrc: 6,
268 t: utf16BEIB.NewEncoder(),
269 err: transform.ErrShortDst,
270 }, {
271 desc: "utf-16 enc: short dst surrogate",
272 src: "\U00012345=Ra",
273 sizeDst: 3,
274 want: "",
275 nSrc: 0,
276 t: utf16BEIB.NewEncoder(),
277 err: transform.ErrShortDst,
278 }, {
279 desc: "utf-16 enc: short src",
280 src: "\U00012345=Ra\xC2",
281 notEOF: true,
282 sizeDst: 100,
283 want: "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
284 nSrc: 7,
285 t: utf16BEIB.NewEncoder(),
286 err: transform.ErrShortSrc,
287 }, {
288 desc: "utf-16be dec: don't change byte order mid-stream",
289 src: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\xFF\xFE\x00\x52\x00\x61",
290 sizeDst: 100,
291 want: "\U00012345=\ufffeRa",
292 nSrc: 14,
293 t: utf16BEUB.NewDecoder(),
294 }, {
295 desc: "utf-16le dec: don't change byte order mid-stream",
296 src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x52\x00\x61\x00",
297 sizeDst: 100,
298 want: "\U00012345=\ufeff\ufffeRa",
299 nSrc: 16,
300 t: utf16LEUB.NewDecoder(),
301 }}
302 for i, tc := range testCases {
303 for j := 0; j < 2; j++ {
304 b := make([]byte, tc.sizeDst)
305 nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF)
306 if err != tc.err {
307 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
308 }
309 if got := string(b[:nDst]); got != tc.want {
310 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
311 }
312 if nSrc != tc.nSrc {
313 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
314 }
315
316
317 if err == nil {
318 break
319 }
320 }
321 }
322 }
323
324 func TestUTF8Decoder(t *testing.T) {
325 testCases := []struct {
326 desc string
327 src string
328 notEOF bool
329 sizeDst int
330 want string
331 nSrc int
332 err error
333 }{{
334 desc: "empty string, empty dest buffer",
335 }, {
336 desc: "empty string",
337 sizeDst: 8,
338 }, {
339 desc: "empty string, streaming",
340 notEOF: true,
341 sizeDst: 8,
342 }, {
343 desc: "ascii",
344 src: "abcde",
345 sizeDst: 8,
346 want: "abcde",
347 nSrc: 5,
348 }, {
349 desc: "ascii and error",
350 src: "ab\x80de",
351 sizeDst: 7,
352 want: "ab\ufffdde",
353 nSrc: 5,
354 }, {
355 desc: "valid two-byte sequence",
356 src: "a\u0300bc",
357 sizeDst: 7,
358 want: "a\u0300bc",
359 nSrc: 5,
360 }, {
361 desc: "valid three-byte sequence",
362 src: "a\u0300中",
363 sizeDst: 7,
364 want: "a\u0300中",
365 nSrc: 6,
366 }, {
367 desc: "valid four-byte sequence",
368 src: "a中\U00016F50",
369 sizeDst: 8,
370 want: "a中\U00016F50",
371 nSrc: 8,
372 }, {
373 desc: "short source buffer",
374 src: "abc\xf0\x90",
375 notEOF: true,
376 sizeDst: 10,
377 want: "abc",
378 nSrc: 3,
379 err: transform.ErrShortSrc,
380 }, {
381
382
383 desc: "complete invalid that looks like short at end",
384 src: "abc\xf0\x80",
385 notEOF: true,
386 sizeDst: 10,
387 want: "abc",
388 nSrc: 3,
389 err: transform.ErrShortSrc,
390 }, {
391 desc: "incomplete sequence at end",
392 src: "a\x80bc\xf0\x90",
393 sizeDst: 9,
394 want: "a\ufffdbc\ufffd",
395 nSrc: 6,
396 }, {
397 desc: "invalid second byte",
398 src: "abc\xf0dddd",
399 sizeDst: 10,
400 want: "abc\ufffddddd",
401 nSrc: 8,
402 }, {
403 desc: "invalid second byte at end",
404 src: "abc\xf0d",
405 sizeDst: 10,
406 want: "abc\ufffdd",
407 nSrc: 5,
408 }, {
409 desc: "invalid third byte",
410 src: "a\u0300bc\xf0\x90dddd",
411 sizeDst: 12,
412 want: "a\u0300bc\ufffddddd",
413 nSrc: 11,
414 }, {
415 desc: "invalid third byte at end",
416 src: "a\u0300bc\xf0\x90d",
417 sizeDst: 12,
418 want: "a\u0300bc\ufffdd",
419 nSrc: 8,
420 }, {
421 desc: "invalid fourth byte, tight buffer",
422 src: "a\u0300bc\xf0\x90\x80d",
423 sizeDst: 9,
424 want: "a\u0300bc\ufffdd",
425 nSrc: 9,
426 }, {
427 desc: "invalid fourth byte at end",
428 src: "a\u0300bc\xf0\x90\x80",
429 sizeDst: 8,
430 want: "a\u0300bc\ufffd",
431 nSrc: 8,
432 }, {
433 desc: "invalid fourth byte and short four byte sequence",
434 src: "a\u0300bc\xf0\x90\x80\xf0\x90\x80",
435 notEOF: true,
436 sizeDst: 20,
437 want: "a\u0300bc\ufffd",
438 nSrc: 8,
439 err: transform.ErrShortSrc,
440 }, {
441 desc: "valid four-byte sequence overflowing short buffer",
442 src: "a\u0300bc\xf0\x90\x80\x80",
443 notEOF: true,
444 sizeDst: 8,
445 want: "a\u0300bc",
446 nSrc: 5,
447 err: transform.ErrShortDst,
448 }, {
449 desc: "invalid fourth byte at end short, but short dst",
450 src: "a\u0300bc\xf0\x90\x80\xf0\x90\x80",
451 notEOF: true,
452 sizeDst: 8,
453
454
455 want: "a\u0300bc",
456 nSrc: 5,
457 err: transform.ErrShortDst,
458 }, {
459 desc: "short dst for error",
460 src: "abc\x80",
461 notEOF: true,
462 sizeDst: 5,
463 want: "abc",
464 nSrc: 3,
465 err: transform.ErrShortDst,
466 }, {
467 desc: "adjusting short dst buffer",
468 src: "abc\x80ef",
469 notEOF: true,
470 sizeDst: 6,
471 want: "abc\ufffd",
472 nSrc: 4,
473 err: transform.ErrShortDst,
474 }}
475 tr := UTF8.NewDecoder()
476 for i, tc := range testCases {
477 b := make([]byte, tc.sizeDst)
478 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), !tc.notEOF)
479 if err != tc.err {
480 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
481 }
482 if got := string(b[:nDst]); got != tc.want {
483 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
484 }
485 if nSrc != tc.nSrc {
486 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
487 }
488 }
489 }
490
491 func TestUTF8BOMDecoder(t *testing.T) {
492 testCases := []struct {
493 desc string
494 src string
495 notEOF bool
496 sizeDst int
497 want string
498 nSrc int
499 err error
500 wantAll string
501 }{{
502 desc: "empty string, empty dest buffer",
503 }, {
504 desc: "empty string",
505 sizeDst: 8,
506 }, {
507 desc: "empty string, streaming",
508 notEOF: true,
509 sizeDst: 8,
510 }, {
511 desc: "ascii",
512 src: "abcde",
513 sizeDst: 8,
514 want: "abcde",
515 nSrc: 5,
516 wantAll: "abcde",
517 }, {
518 desc: "ascii with bom",
519 src: utf8BOM + "abcde",
520 sizeDst: 11,
521 want: "abcde",
522 nSrc: 8,
523 wantAll: "abcde",
524 }, {
525 desc: "error with bom",
526 src: utf8BOM + "ab\x80de",
527 sizeDst: 11,
528 want: "ab\ufffdde",
529 nSrc: 8,
530 wantAll: "ab\ufffdde",
531 }, {
532 desc: "short bom",
533 src: utf8BOM[:2],
534 notEOF: true,
535 sizeDst: 7,
536 want: "",
537 nSrc: 0,
538 wantAll: "\ufffd",
539 err: transform.ErrShortSrc,
540 }, {
541 desc: "short bom at end",
542 src: utf8BOM[:2],
543 sizeDst: 7,
544 want: "\ufffd",
545 nSrc: 2,
546 wantAll: "\ufffd",
547 err: nil,
548 }, {
549 desc: "short source buffer",
550 src: "abc\xf0\x90",
551 notEOF: true,
552 sizeDst: 10,
553 want: "abc",
554 nSrc: 3,
555 wantAll: "abc\ufffd",
556 err: transform.ErrShortSrc,
557 }, {
558 desc: "short source buffer with bom",
559 src: utf8BOM + "abc\xf0\x90",
560 notEOF: true,
561 sizeDst: 15,
562 want: "abc",
563 nSrc: 6,
564 wantAll: "abc\ufffd",
565 err: transform.ErrShortSrc,
566 }, {
567 desc: "short dst for error",
568 src: utf8BOM + "abc\x80",
569 notEOF: true,
570 sizeDst: 5,
571 want: "abc",
572 nSrc: 6,
573 wantAll: "abc\ufffd",
574 err: transform.ErrShortDst,
575 }}
576 tr := UTF8BOM.NewDecoder()
577 for i, tc := range testCases {
578 tr.Reset()
579 b := make([]byte, tc.sizeDst)
580 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), !tc.notEOF)
581 if err != tc.err {
582 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
583 }
584 if got := string(b[:nDst]); got != tc.want {
585 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
586 }
587 if nSrc != tc.nSrc {
588 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
589 }
590 if got, _ := tr.String(tc.src); got != tc.wantAll {
591 t.Errorf("%d:%s: String was %s; want %s", i, tc.desc, got, tc.wantAll)
592 }
593 }
594 }
595
596 func TestUTF8SigEncoder(t *testing.T) {
597 testCases := []struct {
598 desc string
599 src string
600 notEOF bool
601 sizeDst int
602 want string
603 wantAll string
604 nSrc int
605 err error
606 }{{
607 desc: "empty string, empty dest buffer",
608 err: transform.ErrShortDst,
609 wantAll: utf8BOM,
610 }, {
611 desc: "empty string",
612 sizeDst: 8,
613 want: utf8BOM,
614 wantAll: utf8BOM,
615 }, {
616 desc: "empty string, streaming",
617 notEOF: true,
618 sizeDst: 8,
619 want: utf8BOM,
620 wantAll: utf8BOM,
621 }, {
622 desc: "ascii",
623 src: "abcde",
624 sizeDst: 8,
625 want: utf8BOM + "abcde",
626 nSrc: 5,
627 wantAll: utf8BOM + "abcde",
628 }, {
629 desc: "short bom at end",
630 src: utf8BOM[:2],
631 sizeDst: 11,
632 want: utf8BOM + "\ufffd",
633 nSrc: 2,
634 wantAll: utf8BOM + "\ufffd",
635 }, {
636 desc: "short bom",
637 src: utf8BOM[:2],
638 notEOF: true,
639 sizeDst: 7,
640 want: utf8BOM,
641 nSrc: 0,
642 err: transform.ErrShortSrc,
643 wantAll: utf8BOM + "\ufffd",
644 }, {
645 desc: "short bom at end",
646 src: utf8BOM[:2],
647 sizeDst: 7,
648 want: utf8BOM + "\ufffd",
649 nSrc: 2,
650 err: nil,
651 wantAll: utf8BOM + "\ufffd",
652 }, {
653 desc: "short dst buffer 2",
654 src: "ab",
655 sizeDst: 2,
656 want: "",
657 nSrc: 0,
658 err: transform.ErrShortDst,
659 wantAll: utf8BOM + "ab",
660 }, {
661 desc: "short dst buffer 3",
662 src: "ab",
663 sizeDst: 3,
664 want: utf8BOM,
665 nSrc: 0,
666 err: transform.ErrShortDst,
667 wantAll: utf8BOM + "ab",
668 }, {
669 desc: "short dst buffer 4",
670 src: "ab",
671 sizeDst: 4,
672 want: utf8BOM + "a",
673 nSrc: 1,
674 err: transform.ErrShortDst,
675 wantAll: utf8BOM + "ab",
676 }}
677 tr := UTF8BOM.NewEncoder()
678 for i, tc := range testCases {
679 tr.Reset()
680 b := make([]byte, tc.sizeDst)
681 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), !tc.notEOF)
682 if err != tc.err {
683 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
684 }
685 if got := string(b[:nDst]); got != tc.want {
686 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
687 }
688 if nSrc != tc.nSrc {
689 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
690 }
691 if got, _ := tr.String(tc.src); got != tc.wantAll {
692 t.Errorf("%d:%s: String was %s; want %s", i, tc.desc, got, tc.wantAll)
693 }
694 }
695 }
696
697 func TestBOMOverride(t *testing.T) {
698 dec := BOMOverride(charmap.CodePage437.NewDecoder())
699 dst := make([]byte, 100)
700 for i, tc := range []struct {
701 src string
702 atEOF bool
703 dst string
704 nSrc int
705 err error
706 }{
707 0: {"H\x82ll\x93", true, "Héllô", 5, nil},
708 1: {"\uFEFFHéllö", true, "Héllö", 10, nil},
709 2: {"\xFE\xFF\x00H\x00e\x00l\x00l\x00o", true, "Hello", 12, nil},
710 3: {"\xFF\xFEH\x00e\x00l\x00l\x00o\x00", true, "Hello", 12, nil},
711 4: {"\uFEFF", true, "", 3, nil},
712 5: {"\xFE\xFF", true, "", 2, nil},
713 6: {"\xFF\xFE", true, "", 2, nil},
714 7: {"\xEF\xBB", true, "\u2229\u2557", 2, nil},
715 8: {"\xEF", true, "\u2229", 1, nil},
716 9: {"", true, "", 0, nil},
717 10: {"\xFE", true, "\u25a0", 1, nil},
718 11: {"\xFF", true, "\u00a0", 1, nil},
719 12: {"\xEF\xBB", false, "", 0, transform.ErrShortSrc},
720 13: {"\xEF", false, "", 0, transform.ErrShortSrc},
721 14: {"", false, "", 0, transform.ErrShortSrc},
722 15: {"\xFE", false, "", 0, transform.ErrShortSrc},
723 16: {"\xFF", false, "", 0, transform.ErrShortSrc},
724 17: {"\xFF\xFE", false, "", 0, transform.ErrShortSrc},
725 } {
726 dec.Reset()
727 nDst, nSrc, err := dec.Transform(dst, []byte(tc.src), tc.atEOF)
728 got := string(dst[:nDst])
729 if nSrc != tc.nSrc {
730 t.Errorf("%d: nSrc: got %d; want %d", i, nSrc, tc.nSrc)
731 }
732 if got != tc.dst {
733 t.Errorf("%d: got %+q; want %+q", i, got, tc.dst)
734 }
735 if err != tc.err {
736 t.Errorf("%d: error: got %v; want %v", i, err, tc.err)
737 }
738 }
739 }
740
View as plain text