1
2
3
4
5 package charmap
6
7 import (
8 "testing"
9
10 "golang.org/x/text/encoding"
11 "golang.org/x/text/encoding/internal"
12 "golang.org/x/text/encoding/internal/enctest"
13 "golang.org/x/text/transform"
14 )
15
16 func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
17 return "Decode", e.NewDecoder(), nil
18 }
19
20 func encASCIISuperset(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
21 return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
22 }
23
24 func encEBCDIC(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
25 return "Encode", e.NewEncoder(), internal.RepertoireError(0x3f)
26 }
27
28 func TestNonRepertoire(t *testing.T) {
29 testCases := []struct {
30 init func(e encoding.Encoding) (string, transform.Transformer, error)
31 e encoding.Encoding
32 src, want string
33 }{
34 {dec, Windows1252, "\x81", "\ufffd"},
35
36 {encEBCDIC, CodePage037, "갂", ""},
37
38 {encEBCDIC, CodePage1047, "갂", ""},
39 {encEBCDIC, CodePage1047, "a¤갂", "\x81\x9F"},
40
41 {encEBCDIC, CodePage1140, "갂", ""},
42 {encEBCDIC, CodePage1140, "a€갂", "\x81\x9F"},
43
44 {encASCIISuperset, Windows1252, "갂", ""},
45 {encASCIISuperset, Windows1252, "a갂", "a"},
46 {encASCIISuperset, Windows1252, "\u00E9갂", "\xE9"},
47 }
48 for _, tc := range testCases {
49 dir, tr, wantErr := tc.init(tc.e)
50
51 dst, _, err := transform.String(tr, tc.src)
52 if err != wantErr {
53 t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
54 }
55 if got := string(dst); got != tc.want {
56 t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
57 }
58 }
59 }
60
61 func TestBasics(t *testing.T) {
62 testCases := []struct {
63 e encoding.Encoding
64 encoded string
65 utf8 string
66 }{{
67 e: CodePage037,
68 encoded: "\xc8\x51\xba\x93\xcf",
69 utf8: "Hé[lõ",
70 }, {
71 e: CodePage437,
72 encoded: "H\x82ll\x93 \x9d\xa7\xf4\x9c\xbe",
73 utf8: "Héllô ¥º⌠£╛",
74 }, {
75 e: CodePage866,
76 encoded: "H\xf3\xd3o \x98\xfd\x9f\xdd\xa1",
77 utf8: "Hє╙o Ш¤Я▌б",
78 }, {
79 e: CodePage1047,
80 encoded: "\xc8\x54\x93\x93\x9f",
81 utf8: "Hèll¤",
82 }, {
83 e: CodePage1140,
84 encoded: "\xc8\x9f\x93\x93\xcf",
85 utf8: "H€llõ",
86 }, {
87 e: ISO8859_2,
88 encoded: "Hel\xe5\xf5",
89 utf8: "Helĺő",
90 }, {
91 e: ISO8859_3,
92 encoded: "He\xbd\xd4",
93 utf8: "He½Ô",
94 }, {
95 e: ISO8859_4,
96 encoded: "Hel\xb6\xf8",
97 utf8: "Helļø",
98 }, {
99 e: ISO8859_5,
100 encoded: "H\xd7\xc6o",
101 utf8: "HзЦo",
102 }, {
103 e: ISO8859_6,
104 encoded: "Hel\xc2\xc9",
105 utf8: "Helآة",
106 }, {
107 e: ISO8859_7,
108 encoded: "H\xeel\xebo",
109 utf8: "Hξlλo",
110 }, {
111 e: ISO8859_8,
112 encoded: "Hel\xf5\xed",
113 utf8: "Helץם",
114 }, {
115 e: ISO8859_9,
116 encoded: "\xdeayet",
117 utf8: "Şayet",
118 }, {
119 e: ISO8859_10,
120 encoded: "H\xea\xbfo",
121 utf8: "Hęŋo",
122 }, {
123 e: ISO8859_13,
124 encoded: "H\xe6l\xf9o",
125 utf8: "Hęlło",
126 }, {
127 e: ISO8859_14,
128 encoded: "He\xfe\xd0o",
129 utf8: "HeŷŴo",
130 }, {
131 e: ISO8859_15,
132 encoded: "H\xa4ll\xd8",
133 utf8: "H€llØ",
134 }, {
135 e: ISO8859_16,
136 encoded: "H\xe6ll\xbd",
137 utf8: "Hællœ",
138 }, {
139 e: KOI8R,
140 encoded: "He\x93\xad\x9c",
141 utf8: "He⌠╜°",
142 }, {
143 e: KOI8U,
144 encoded: "He\x93\xad\x9c",
145 utf8: "He⌠ґ°",
146 }, {
147 e: Macintosh,
148 encoded: "He\xdf\xd7",
149 utf8: "Hefl◊",
150 }, {
151 e: MacintoshCyrillic,
152 encoded: "He\xbe\x94",
153 utf8: "HeЊФ",
154 }, {
155 e: Windows874,
156 encoded: "He\xb7\xf0",
157 utf8: "Heท๐",
158 }, {
159 e: Windows1250,
160 encoded: "He\xe5\xe5o",
161 utf8: "Heĺĺo",
162 }, {
163 e: Windows1251,
164 encoded: "H\xball\xfe",
165 utf8: "Hєllю",
166 }, {
167 e: Windows1252,
168 encoded: "H\xe9ll\xf4 \xa5\xbA\xae\xa3\xd0",
169 utf8: "Héllô ¥º®£Ð",
170 }, {
171 e: Windows1253,
172 encoded: "H\xe5ll\xd6",
173 utf8: "HεllΦ",
174 }, {
175 e: Windows1254,
176 encoded: "\xd0ello",
177 utf8: "Ğello",
178 }, {
179 e: Windows1255,
180 encoded: "He\xd4o",
181 utf8: "Heװo",
182 }, {
183 e: Windows1256,
184 encoded: "H\xdbllo",
185 utf8: "Hغllo",
186 }, {
187 e: Windows1257,
188 encoded: "He\xeflo",
189 utf8: "Heļlo",
190 }, {
191 e: Windows1258,
192 encoded: "Hell\xf5",
193 utf8: "Hellơ",
194 }, {
195 e: XUserDefined,
196 encoded: "\x00\x40\x7f\x80\xab\xff",
197 utf8: "\u0000\u0040\u007f\uf780\uf7ab\uf7ff",
198 }}
199
200 for _, tc := range testCases {
201 enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
202 }
203 }
204
205 var windows1255TestCases = []struct {
206 b byte
207 ok bool
208 r rune
209 }{
210 {'\x00', true, '\u0000'},
211 {'\x1a', true, '\u001a'},
212 {'\x61', true, '\u0061'},
213 {'\x7f', true, '\u007f'},
214 {'\x80', true, '\u20ac'},
215 {'\x95', true, '\u2022'},
216 {'\xa0', true, '\u00a0'},
217 {'\xc0', true, '\u05b0'},
218 {'\xfc', true, '\ufffd'},
219 {'\xfd', true, '\u200e'},
220 {'\xfe', true, '\u200f'},
221 {'\xff', true, '\ufffd'},
222 {encoding.ASCIISub, false, '\u0400'},
223 {encoding.ASCIISub, false, '\u2603'},
224 {encoding.ASCIISub, false, '\U0001f4a9'},
225 }
226
227 func TestDecodeByte(t *testing.T) {
228 for _, tc := range windows1255TestCases {
229 if !tc.ok {
230 continue
231 }
232
233 got := Windows1255.DecodeByte(tc.b)
234 want := tc.r
235 if got != want {
236 t.Errorf("DecodeByte(%#02x): got %#08x, want %#08x", tc.b, got, want)
237 }
238 }
239 }
240
241 func TestEncodeRune(t *testing.T) {
242 for _, tc := range windows1255TestCases {
243
244 if tc.r == '\ufffd' {
245 continue
246 }
247
248 gotB, gotOK := Windows1255.EncodeRune(tc.r)
249 wantB, wantOK := tc.b, tc.ok
250 if gotB != wantB || gotOK != wantOK {
251 t.Errorf("EncodeRune(%#08x): got (%#02x, %t), want (%#02x, %t)", tc.r, gotB, gotOK, wantB, wantOK)
252 }
253 }
254 }
255
256 func TestFiles(t *testing.T) { enctest.TestFile(t, Windows1252) }
257
258 func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Windows1252) }
259
View as plain text