...

Source file src/golang.org/x/text/encoding/encoding_test.go

Documentation: golang.org/x/text/encoding

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package encoding_test
     6  
     7  import (
     8  	"io"
     9  	"strings"
    10  	"testing"
    11  
    12  	"golang.org/x/text/encoding"
    13  	"golang.org/x/text/encoding/charmap"
    14  	"golang.org/x/text/transform"
    15  )
    16  
    17  func TestEncodeInvalidUTF8(t *testing.T) {
    18  	inputs := []string{
    19  		"hello.",
    20  		"wo\ufffdld.",
    21  		"ABC\xff\x80\x80", // Invalid UTF-8.
    22  		"\x80\x80\x80\x80\x80",
    23  		"\x80\x80D\x80\x80",          // Valid rune at "D".
    24  		"E\xed\xa0\x80\xed\xbf\xbfF", // Two invalid UTF-8 runes (surrogates).
    25  		"G",
    26  		"H\xe2\x82",     // U+20AC in UTF-8 is "\xe2\x82\xac", which we split over two
    27  		"\xacI\xe2\x82", // input lines. It maps to 0x80 in the Windows-1252 encoding.
    28  	}
    29  	// Each invalid source byte becomes '\x1a'.
    30  	want := strings.Replace("hello.wo?ld.ABC??????????D??E??????FGH\x80I??", "?", "\x1a", -1)
    31  
    32  	transformer := encoding.ReplaceUnsupported(charmap.Windows1252.NewEncoder())
    33  	gotBuf := make([]byte, 0, 1024)
    34  	src := make([]byte, 0, 1024)
    35  	for i, input := range inputs {
    36  		dst := make([]byte, 1024)
    37  		src = append(src, input...)
    38  		atEOF := i == len(inputs)-1
    39  		nDst, nSrc, err := transformer.Transform(dst, src, atEOF)
    40  		gotBuf = append(gotBuf, dst[:nDst]...)
    41  		src = src[nSrc:]
    42  		if err != nil && err != transform.ErrShortSrc {
    43  			t.Fatalf("i=%d: %v", i, err)
    44  		}
    45  		if atEOF && err != nil {
    46  			t.Fatalf("i=%d: atEOF: %v", i, err)
    47  		}
    48  	}
    49  	if got := string(gotBuf); got != want {
    50  		t.Fatalf("\ngot  %+q\nwant %+q", got, want)
    51  	}
    52  }
    53  
    54  func TestReplacement(t *testing.T) {
    55  	for _, direction := range []string{"Decode", "Encode"} {
    56  		enc, want := (transform.Transformer)(nil), ""
    57  		if direction == "Decode" {
    58  			enc = encoding.Replacement.NewDecoder()
    59  			want = "\ufffd"
    60  		} else {
    61  			enc = encoding.Replacement.NewEncoder()
    62  			want = "AB\x00CD\ufffdYZ"
    63  		}
    64  		sr := strings.NewReader("AB\x00CD\x80YZ")
    65  		g, err := io.ReadAll(transform.NewReader(sr, enc))
    66  		if err != nil {
    67  			t.Errorf("%s: ReadAll: %v", direction, err)
    68  			continue
    69  		}
    70  		if got := string(g); got != want {
    71  			t.Errorf("%s:\ngot  %q\nwant %q", direction, got, want)
    72  			continue
    73  		}
    74  	}
    75  }
    76  
    77  func TestUTF8Validator(t *testing.T) {
    78  	testCases := []struct {
    79  		desc    string
    80  		dstSize int
    81  		src     string
    82  		atEOF   bool
    83  		want    string
    84  		wantErr error
    85  	}{
    86  		{
    87  			"empty input",
    88  			100,
    89  			"",
    90  			false,
    91  			"",
    92  			nil,
    93  		},
    94  		{
    95  			"valid 1-byte 1-rune input",
    96  			100,
    97  			"a",
    98  			false,
    99  			"a",
   100  			nil,
   101  		},
   102  		{
   103  			"valid 3-byte 1-rune input",
   104  			100,
   105  			"\u1234",
   106  			false,
   107  			"\u1234",
   108  			nil,
   109  		},
   110  		{
   111  			"valid 5-byte 3-rune input",
   112  			100,
   113  			"a\u0100\u0101",
   114  			false,
   115  			"a\u0100\u0101",
   116  			nil,
   117  		},
   118  		{
   119  			"perfectly sized dst (non-ASCII)",
   120  			5,
   121  			"a\u0100\u0101",
   122  			false,
   123  			"a\u0100\u0101",
   124  			nil,
   125  		},
   126  		{
   127  			"short dst (non-ASCII)",
   128  			4,
   129  			"a\u0100\u0101",
   130  			false,
   131  			"a\u0100",
   132  			transform.ErrShortDst,
   133  		},
   134  		{
   135  			"perfectly sized dst (ASCII)",
   136  			5,
   137  			"abcde",
   138  			false,
   139  			"abcde",
   140  			nil,
   141  		},
   142  		{
   143  			"short dst (ASCII)",
   144  			4,
   145  			"abcde",
   146  			false,
   147  			"abcd",
   148  			transform.ErrShortDst,
   149  		},
   150  		{
   151  			"partial input (!EOF)",
   152  			100,
   153  			"a\u0100\xf1",
   154  			false,
   155  			"a\u0100",
   156  			transform.ErrShortSrc,
   157  		},
   158  		{
   159  			"invalid input (EOF)",
   160  			100,
   161  			"a\u0100\xf1",
   162  			true,
   163  			"a\u0100",
   164  			encoding.ErrInvalidUTF8,
   165  		},
   166  		{
   167  			"invalid input (!EOF)",
   168  			100,
   169  			"a\u0100\x80",
   170  			false,
   171  			"a\u0100",
   172  			encoding.ErrInvalidUTF8,
   173  		},
   174  		{
   175  			"invalid input (above U+10FFFF)",
   176  			100,
   177  			"a\u0100\xf7\xbf\xbf\xbf",
   178  			false,
   179  			"a\u0100",
   180  			encoding.ErrInvalidUTF8,
   181  		},
   182  		{
   183  			"invalid input (surrogate half)",
   184  			100,
   185  			"a\u0100\xed\xa0\x80",
   186  			false,
   187  			"a\u0100",
   188  			encoding.ErrInvalidUTF8,
   189  		},
   190  	}
   191  	for _, tc := range testCases {
   192  		dst := make([]byte, tc.dstSize)
   193  		nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF)
   194  		if nDst < 0 || len(dst) < nDst {
   195  			t.Errorf("%s: nDst=%d out of range", tc.desc, nDst)
   196  			continue
   197  		}
   198  		got := string(dst[:nDst])
   199  		if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr {
   200  			t.Errorf("%s:\ngot  %+q, %d, %v\nwant %+q, %d, %v",
   201  				tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr)
   202  			continue
   203  		}
   204  	}
   205  }
   206  
   207  func TestErrorHandler(t *testing.T) {
   208  	testCases := []struct {
   209  		desc      string
   210  		handler   func(*encoding.Encoder) *encoding.Encoder
   211  		sizeDst   int
   212  		src, want string
   213  		nSrc      int
   214  		err       error
   215  	}{
   216  		{
   217  			desc:    "one rune replacement",
   218  			handler: encoding.ReplaceUnsupported,
   219  			sizeDst: 100,
   220  			src:     "\uAC00",
   221  			want:    "\x1a",
   222  			nSrc:    3,
   223  		},
   224  		{
   225  			desc:    "mid-stream rune replacement",
   226  			handler: encoding.ReplaceUnsupported,
   227  			sizeDst: 100,
   228  			src:     "a\uAC00bcd\u00e9",
   229  			want:    "a\x1abcd\xe9",
   230  			nSrc:    9,
   231  		},
   232  		{
   233  			desc:    "at end rune replacement",
   234  			handler: encoding.ReplaceUnsupported,
   235  			sizeDst: 10,
   236  			src:     "\u00e9\uAC00",
   237  			want:    "\xe9\x1a",
   238  			nSrc:    5,
   239  		},
   240  		{
   241  			desc:    "short buffer replacement",
   242  			handler: encoding.ReplaceUnsupported,
   243  			sizeDst: 1,
   244  			src:     "\u00e9\uAC00",
   245  			want:    "\xe9",
   246  			nSrc:    2,
   247  			err:     transform.ErrShortDst,
   248  		},
   249  		{
   250  			desc:    "one rune html escape",
   251  			handler: encoding.HTMLEscapeUnsupported,
   252  			sizeDst: 100,
   253  			src:     "\uAC00",
   254  			want:    "&#44032;",
   255  			nSrc:    3,
   256  		},
   257  		{
   258  			desc:    "mid-stream html escape",
   259  			handler: encoding.HTMLEscapeUnsupported,
   260  			sizeDst: 100,
   261  			src:     "\u00e9\uAC00dcba",
   262  			want:    "\xe9&#44032;dcba",
   263  			nSrc:    9,
   264  		},
   265  		{
   266  			desc:    "short buffer html escape",
   267  			handler: encoding.HTMLEscapeUnsupported,
   268  			sizeDst: 9,
   269  			src:     "ab\uAC01",
   270  			want:    "ab",
   271  			nSrc:    2,
   272  			err:     transform.ErrShortDst,
   273  		},
   274  	}
   275  	for i, tc := range testCases {
   276  		tr := tc.handler(charmap.Windows1250.NewEncoder())
   277  		b := make([]byte, tc.sizeDst)
   278  		nDst, nSrc, err := tr.Transform(b, []byte(tc.src), true)
   279  		if err != tc.err {
   280  			t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
   281  		}
   282  		if got := string(b[:nDst]); got != tc.want {
   283  			t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
   284  		}
   285  		if nSrc != tc.nSrc {
   286  			t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
   287  		}
   288  
   289  	}
   290  }
   291  

View as plain text