...

Source file src/golang.org/x/text/encoding/charmap/charmap_test.go

Documentation: golang.org/x/text/encoding/charmap

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package charmap
     6  
     7  import (
     8  	"testing"
     9  
    10  	"golang.org/x/text/encoding"
    11  	"golang.org/x/text/encoding/internal"
    12  	"golang.org/x/text/encoding/internal/enctest"
    13  	"golang.org/x/text/transform"
    14  )
    15  
    16  func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
    17  	return "Decode", e.NewDecoder(), nil
    18  }
    19  
    20  func encASCIISuperset(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
    21  	return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
    22  }
    23  
    24  func encEBCDIC(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
    25  	return "Encode", e.NewEncoder(), internal.RepertoireError(0x3f)
    26  }
    27  
    28  func TestNonRepertoire(t *testing.T) {
    29  	testCases := []struct {
    30  		init      func(e encoding.Encoding) (string, transform.Transformer, error)
    31  		e         encoding.Encoding
    32  		src, want string
    33  	}{
    34  		{dec, Windows1252, "\x81", "\ufffd"},
    35  
    36  		{encEBCDIC, CodePage037, "갂", ""},
    37  
    38  		{encEBCDIC, CodePage1047, "갂", ""},
    39  		{encEBCDIC, CodePage1047, "a¤갂", "\x81\x9F"},
    40  
    41  		{encEBCDIC, CodePage1140, "갂", ""},
    42  		{encEBCDIC, CodePage1140, "a€갂", "\x81\x9F"},
    43  
    44  		{encASCIISuperset, Windows1252, "갂", ""},
    45  		{encASCIISuperset, Windows1252, "a갂", "a"},
    46  		{encASCIISuperset, Windows1252, "\u00E9갂", "\xE9"},
    47  	}
    48  	for _, tc := range testCases {
    49  		dir, tr, wantErr := tc.init(tc.e)
    50  
    51  		dst, _, err := transform.String(tr, tc.src)
    52  		if err != wantErr {
    53  			t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
    54  		}
    55  		if got := string(dst); got != tc.want {
    56  			t.Errorf("%s %v(%q):\ngot  %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
    57  		}
    58  	}
    59  }
    60  
    61  func TestBasics(t *testing.T) {
    62  	testCases := []struct {
    63  		e       encoding.Encoding
    64  		encoded string
    65  		utf8    string
    66  	}{{
    67  		e:       CodePage037,
    68  		encoded: "\xc8\x51\xba\x93\xcf",
    69  		utf8:    "Hé[lõ",
    70  	}, {
    71  		e:       CodePage437,
    72  		encoded: "H\x82ll\x93 \x9d\xa7\xf4\x9c\xbe",
    73  		utf8:    "Héllô ¥º⌠£╛",
    74  	}, {
    75  		e:       CodePage866,
    76  		encoded: "H\xf3\xd3o \x98\xfd\x9f\xdd\xa1",
    77  		utf8:    "Hє╙o Ш¤Я▌б",
    78  	}, {
    79  		e:       CodePage1047,
    80  		encoded: "\xc8\x54\x93\x93\x9f",
    81  		utf8:    "Hèll¤",
    82  	}, {
    83  		e:       CodePage1140,
    84  		encoded: "\xc8\x9f\x93\x93\xcf",
    85  		utf8:    "H€llõ",
    86  	}, {
    87  		e:       ISO8859_2,
    88  		encoded: "Hel\xe5\xf5",
    89  		utf8:    "Helĺő",
    90  	}, {
    91  		e:       ISO8859_3,
    92  		encoded: "He\xbd\xd4",
    93  		utf8:    "He½Ô",
    94  	}, {
    95  		e:       ISO8859_4,
    96  		encoded: "Hel\xb6\xf8",
    97  		utf8:    "Helļø",
    98  	}, {
    99  		e:       ISO8859_5,
   100  		encoded: "H\xd7\xc6o",
   101  		utf8:    "HзЦo",
   102  	}, {
   103  		e:       ISO8859_6,
   104  		encoded: "Hel\xc2\xc9",
   105  		utf8:    "Helآة",
   106  	}, {
   107  		e:       ISO8859_7,
   108  		encoded: "H\xeel\xebo",
   109  		utf8:    "Hξlλo",
   110  	}, {
   111  		e:       ISO8859_8,
   112  		encoded: "Hel\xf5\xed",
   113  		utf8:    "Helץם",
   114  	}, {
   115  		e:       ISO8859_9,
   116  		encoded: "\xdeayet",
   117  		utf8:    "Şayet",
   118  	}, {
   119  		e:       ISO8859_10,
   120  		encoded: "H\xea\xbfo",
   121  		utf8:    "Hęŋo",
   122  	}, {
   123  		e:       ISO8859_13,
   124  		encoded: "H\xe6l\xf9o",
   125  		utf8:    "Hęlło",
   126  	}, {
   127  		e:       ISO8859_14,
   128  		encoded: "He\xfe\xd0o",
   129  		utf8:    "HeŷŴo",
   130  	}, {
   131  		e:       ISO8859_15,
   132  		encoded: "H\xa4ll\xd8",
   133  		utf8:    "H€llØ",
   134  	}, {
   135  		e:       ISO8859_16,
   136  		encoded: "H\xe6ll\xbd",
   137  		utf8:    "Hællœ",
   138  	}, {
   139  		e:       KOI8R,
   140  		encoded: "He\x93\xad\x9c",
   141  		utf8:    "He⌠╜°",
   142  	}, {
   143  		e:       KOI8U,
   144  		encoded: "He\x93\xad\x9c",
   145  		utf8:    "He⌠ґ°",
   146  	}, {
   147  		e:       Macintosh,
   148  		encoded: "He\xdf\xd7",
   149  		utf8:    "Hefl◊",
   150  	}, {
   151  		e:       MacintoshCyrillic,
   152  		encoded: "He\xbe\x94",
   153  		utf8:    "HeЊФ",
   154  	}, {
   155  		e:       Windows874,
   156  		encoded: "He\xb7\xf0",
   157  		utf8:    "Heท๐",
   158  	}, {
   159  		e:       Windows1250,
   160  		encoded: "He\xe5\xe5o",
   161  		utf8:    "Heĺĺo",
   162  	}, {
   163  		e:       Windows1251,
   164  		encoded: "H\xball\xfe",
   165  		utf8:    "Hєllю",
   166  	}, {
   167  		e:       Windows1252,
   168  		encoded: "H\xe9ll\xf4 \xa5\xbA\xae\xa3\xd0",
   169  		utf8:    "Héllô ¥º®£Ð",
   170  	}, {
   171  		e:       Windows1253,
   172  		encoded: "H\xe5ll\xd6",
   173  		utf8:    "HεllΦ",
   174  	}, {
   175  		e:       Windows1254,
   176  		encoded: "\xd0ello",
   177  		utf8:    "Ğello",
   178  	}, {
   179  		e:       Windows1255,
   180  		encoded: "He\xd4o",
   181  		utf8:    "Heװo",
   182  	}, {
   183  		e:       Windows1256,
   184  		encoded: "H\xdbllo",
   185  		utf8:    "Hغllo",
   186  	}, {
   187  		e:       Windows1257,
   188  		encoded: "He\xeflo",
   189  		utf8:    "Heļlo",
   190  	}, {
   191  		e:       Windows1258,
   192  		encoded: "Hell\xf5",
   193  		utf8:    "Hellơ",
   194  	}, {
   195  		e:       XUserDefined,
   196  		encoded: "\x00\x40\x7f\x80\xab\xff",
   197  		utf8:    "\u0000\u0040\u007f\uf780\uf7ab\uf7ff",
   198  	}}
   199  
   200  	for _, tc := range testCases {
   201  		enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
   202  	}
   203  }
   204  
   205  var windows1255TestCases = []struct {
   206  	b  byte
   207  	ok bool
   208  	r  rune
   209  }{
   210  	{'\x00', true, '\u0000'},
   211  	{'\x1a', true, '\u001a'},
   212  	{'\x61', true, '\u0061'},
   213  	{'\x7f', true, '\u007f'},
   214  	{'\x80', true, '\u20ac'},
   215  	{'\x95', true, '\u2022'},
   216  	{'\xa0', true, '\u00a0'},
   217  	{'\xc0', true, '\u05b0'},
   218  	{'\xfc', true, '\ufffd'},
   219  	{'\xfd', true, '\u200e'},
   220  	{'\xfe', true, '\u200f'},
   221  	{'\xff', true, '\ufffd'},
   222  	{encoding.ASCIISub, false, '\u0400'},
   223  	{encoding.ASCIISub, false, '\u2603'},
   224  	{encoding.ASCIISub, false, '\U0001f4a9'},
   225  }
   226  
   227  func TestDecodeByte(t *testing.T) {
   228  	for _, tc := range windows1255TestCases {
   229  		if !tc.ok {
   230  			continue
   231  		}
   232  
   233  		got := Windows1255.DecodeByte(tc.b)
   234  		want := tc.r
   235  		if got != want {
   236  			t.Errorf("DecodeByte(%#02x): got %#08x, want %#08x", tc.b, got, want)
   237  		}
   238  	}
   239  }
   240  
   241  func TestEncodeRune(t *testing.T) {
   242  	for _, tc := range windows1255TestCases {
   243  		// There can be multiple tc.b values that map to tc.r = '\ufffd'.
   244  		if tc.r == '\ufffd' {
   245  			continue
   246  		}
   247  
   248  		gotB, gotOK := Windows1255.EncodeRune(tc.r)
   249  		wantB, wantOK := tc.b, tc.ok
   250  		if gotB != wantB || gotOK != wantOK {
   251  			t.Errorf("EncodeRune(%#08x): got (%#02x, %t), want (%#02x, %t)", tc.r, gotB, gotOK, wantB, wantOK)
   252  		}
   253  	}
   254  }
   255  
   256  func TestFiles(t *testing.T) { enctest.TestFile(t, Windows1252) }
   257  
   258  func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Windows1252) }
   259  

View as plain text