...

Source file src/golang.org/x/text/encoding/unicode/utf32/utf32_test.go

Documentation: golang.org/x/text/encoding/unicode/utf32

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package utf32
     6  
     7  import (
     8  	"testing"
     9  
    10  	"golang.org/x/text/encoding"
    11  	"golang.org/x/text/encoding/internal/enctest"
    12  	"golang.org/x/text/transform"
    13  )
    14  
    15  var (
    16  	utf32LEIB = UTF32(LittleEndian, IgnoreBOM) // UTF-32LE (atypical interpretation)
    17  	utf32LEUB = UTF32(LittleEndian, UseBOM)    // UTF-32, LE
    18  	//	utf32LEEB = UTF32(LittleEndian, ExpectBOM) // UTF-32, LE, Expect - covered in encoding_test.go
    19  	utf32BEIB = UTF32(BigEndian, IgnoreBOM) // UTF-32BE (atypical interpretation)
    20  	utf32BEUB = UTF32(BigEndian, UseBOM)    // UTF-32 default
    21  	utf32BEEB = UTF32(BigEndian, ExpectBOM) // UTF-32 Expect
    22  )
    23  
    24  func TestBasics(t *testing.T) {
    25  	testCases := []struct {
    26  		e         encoding.Encoding
    27  		encPrefix string
    28  		encSuffix string
    29  		encoded   string
    30  		utf8      string
    31  	}{{
    32  		e:       utf32BEIB,
    33  		encoded: "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65",
    34  		utf8:    "\x57\u00e4\U0001d565",
    35  	}, {
    36  		e:         UTF32(BigEndian, ExpectBOM),
    37  		encPrefix: "\x00\x00\xfe\xff",
    38  		encoded:   "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65",
    39  		utf8:      "\x57\u00e4\U0001d565",
    40  	}, {
    41  		e:       UTF32(LittleEndian, IgnoreBOM),
    42  		encoded: "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00",
    43  		utf8:    "\x57\u00e4\U0001d565",
    44  	}, {
    45  		e:         UTF32(LittleEndian, ExpectBOM),
    46  		encPrefix: "\xff\xfe\x00\x00",
    47  		encoded:   "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00",
    48  		utf8:      "\x57\u00e4\U0001d565",
    49  	}}
    50  
    51  	for _, tc := range testCases {
    52  		enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix)
    53  	}
    54  }
    55  
    56  func TestFiles(t *testing.T) { enctest.TestFile(t, utf32BEIB) }
    57  
    58  func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, utf32BEIB) }
    59  
    60  func TestUTF32(t *testing.T) {
    61  	testCases := []struct {
    62  		desc    string
    63  		src     string
    64  		notEOF  bool // the inverse of atEOF
    65  		sizeDst int
    66  		want    string
    67  		nSrc    int
    68  		err     error
    69  		t       transform.Transformer
    70  	}{{
    71  		desc: "utf-32 IgnoreBOM dec: empty string",
    72  		t:    utf32BEIB.NewDecoder(),
    73  	}, {
    74  		desc: "utf-32 UseBOM dec: empty string",
    75  		t:    utf32BEUB.NewDecoder(),
    76  	}, {
    77  		desc: "utf-32 ExpectBOM dec: empty string",
    78  		err:  ErrMissingBOM,
    79  		t:    utf32BEEB.NewDecoder(),
    80  	}, {
    81  		desc:    "utf-32be dec: Doesn't interpret U+FEFF as BOM",
    82  		src:     "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
    83  		sizeDst: 100,
    84  		want:    "\uFEFF\U00012345=Ra",
    85  		nSrc:    20,
    86  		t:       utf32BEIB.NewDecoder(),
    87  	}, {
    88  		desc:    "utf-32be dec: Interprets little endian U+FEFF as invalid",
    89  		src:     "\xFF\xFE\x00\x00\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
    90  		sizeDst: 100,
    91  		want:    "\uFFFD\U00012345=Ra",
    92  		nSrc:    20,
    93  		t:       utf32BEIB.NewDecoder(),
    94  	}, {
    95  		desc:    "utf-32le dec: Doesn't interpret U+FEFF as BOM",
    96  		src:     "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
    97  		sizeDst: 100,
    98  		want:    "\uFEFF\U00012345=Ra",
    99  		nSrc:    20,
   100  		t:       utf32LEIB.NewDecoder(),
   101  	}, {
   102  		desc:    "utf-32le dec: Interprets big endian U+FEFF as invalid",
   103  		src:     "\x00\x00\xFE\xFF\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
   104  		sizeDst: 100,
   105  		want:    "\uFFFD\U00012345=Ra",
   106  		nSrc:    20,
   107  		t:       utf32LEIB.NewDecoder(),
   108  	}, {
   109  		desc:    "utf-32 enc: Writes big-endian BOM",
   110  		src:     "\U00012345=Ra",
   111  		sizeDst: 100,
   112  		want:    "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
   113  		nSrc:    7,
   114  		t:       utf32BEUB.NewEncoder(),
   115  	}, {
   116  		desc:    "utf-32 enc: Writes little-endian BOM",
   117  		src:     "\U00012345=Ra",
   118  		sizeDst: 100,
   119  		want:    "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
   120  		nSrc:    7,
   121  		t:       utf32LEUB.NewEncoder(),
   122  	}, {
   123  		desc:    "utf-32 dec: Interprets text using big-endian default when BOM not present",
   124  		src:     "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
   125  		sizeDst: 100,
   126  		want:    "\U00012345=Ra",
   127  		nSrc:    16,
   128  		t:       utf32BEUB.NewDecoder(),
   129  	}, {
   130  		desc:    "utf-32 dec: Interprets text using little-endian default when BOM not present",
   131  		src:     "\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
   132  		sizeDst: 100,
   133  		want:    "\U00012345=Ra",
   134  		nSrc:    16,
   135  		t:       utf32LEUB.NewDecoder(),
   136  	}, {
   137  		desc:    "utf-32 dec: BOM determines encoding BE",
   138  		src:     "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
   139  		sizeDst: 100,
   140  		want:    "\U00012345=Ra",
   141  		nSrc:    20,
   142  		t:       utf32BEUB.NewDecoder(),
   143  	}, {
   144  		desc:    "utf-32 dec: BOM determines encoding LE",
   145  		src:     "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
   146  		sizeDst: 100,
   147  		want:    "\U00012345=Ra",
   148  		nSrc:    20,
   149  		t:       utf32LEUB.NewDecoder(),
   150  	}, {
   151  		desc:    "utf-32 dec: BOM determines encoding LE, change default",
   152  		src:     "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
   153  		sizeDst: 100,
   154  		want:    "\U00012345=Ra",
   155  		nSrc:    20,
   156  		t:       utf32BEUB.NewDecoder(),
   157  	}, {
   158  		desc:    "utf-32 dec: BOM determines encoding BE, change default",
   159  		src:     "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
   160  		sizeDst: 100,
   161  		want:    "\U00012345=Ra",
   162  		nSrc:    20,
   163  		t:       utf32LEUB.NewDecoder(),
   164  	}, {
   165  		desc:    "utf-32 dec: Don't change big-endian byte order mid-stream",
   166  		src:     "\x00\x01\x23\x45\x00\x00\x00\x3D\xFF\xFE\x00\x00\x00\x00\xFE\xFF\x00\x00\x00\x52\x00\x00\x00\x61",
   167  		sizeDst: 100,
   168  		want:    "\U00012345=\uFFFD\uFEFFRa",
   169  		nSrc:    24,
   170  		t:       utf32BEUB.NewDecoder(),
   171  	}, {
   172  		desc:    "utf-32 dec: Don't change little-endian byte order mid-stream",
   173  		src:     "\x45\x23\x01\x00\x3D\x00\x00\x00\x00\x00\xFE\xFF\xFF\xFE\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
   174  		sizeDst: 100,
   175  		want:    "\U00012345=\uFFFD\uFEFFRa",
   176  		nSrc:    24,
   177  		t:       utf32LEUB.NewDecoder(),
   178  	}, {
   179  		desc:    "utf-32 dec: Fail on missing BOM when required",
   180  		src:     "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
   181  		sizeDst: 100,
   182  		want:    "",
   183  		nSrc:    0,
   184  		err:     ErrMissingBOM,
   185  		t:       utf32BEEB.NewDecoder(),
   186  	}, {
   187  		desc:    "utf-32 enc: Short dst",
   188  		src:     "\U00012345=Ra",
   189  		sizeDst: 15,
   190  		want:    "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52",
   191  		nSrc:    6,
   192  		err:     transform.ErrShortDst,
   193  		t:       utf32BEIB.NewEncoder(),
   194  	}, {
   195  		desc:    "utf-32 enc: Short src",
   196  		src:     "\U00012345=Ra\xC2",
   197  		notEOF:  true,
   198  		sizeDst: 100,
   199  		want:    "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
   200  		nSrc:    7,
   201  		err:     transform.ErrShortSrc,
   202  		t:       utf32BEIB.NewEncoder(),
   203  	}, {
   204  		desc:    "utf-32 enc: Invalid input",
   205  		src:     "\x80\xC1\xC2\x7F\xC2",
   206  		sizeDst: 100,
   207  		want:    "\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\x00\x7F\x00\x00\xFF\xFD",
   208  		nSrc:    5,
   209  		t:       utf32BEIB.NewEncoder(),
   210  	}, {
   211  		desc:    "utf-32 dec: Short dst",
   212  		src:     "\x00\x00\x00\x41",
   213  		sizeDst: 0,
   214  		want:    "",
   215  		nSrc:    0,
   216  		err:     transform.ErrShortDst,
   217  		t:       utf32BEIB.NewDecoder(),
   218  	}, {
   219  		desc:    "utf-32 dec: Short src",
   220  		src:     "\x00\x00\x00",
   221  		notEOF:  true,
   222  		sizeDst: 4,
   223  		want:    "",
   224  		nSrc:    0,
   225  		err:     transform.ErrShortSrc,
   226  		t:       utf32BEIB.NewDecoder(),
   227  	}, {
   228  		desc:    "utf-32 dec: Invalid input",
   229  		src:     "\x00\x00\xD8\x00\x00\x00\xDF\xFF\x00\x11\x00\x00\x00\x00\x00",
   230  		sizeDst: 100,
   231  		want:    "\uFFFD\uFFFD\uFFFD\uFFFD",
   232  		nSrc:    15,
   233  		t:       utf32BEIB.NewDecoder(),
   234  	}}
   235  	for i, tc := range testCases {
   236  		b := make([]byte, tc.sizeDst)
   237  		nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF)
   238  		if err != tc.err {
   239  			t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
   240  		}
   241  		if got := string(b[:nDst]); got != tc.want {
   242  			t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
   243  		}
   244  		if nSrc != tc.nSrc {
   245  			t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
   246  		}
   247  	}
   248  }
   249  

View as plain text