1
2
3
4
5 package encoding_test
6
7 import (
8 "io"
9 "strings"
10 "testing"
11
12 "golang.org/x/text/encoding"
13 "golang.org/x/text/encoding/charmap"
14 "golang.org/x/text/transform"
15 )
16
17 func TestEncodeInvalidUTF8(t *testing.T) {
18 inputs := []string{
19 "hello.",
20 "wo\ufffdld.",
21 "ABC\xff\x80\x80",
22 "\x80\x80\x80\x80\x80",
23 "\x80\x80D\x80\x80",
24 "E\xed\xa0\x80\xed\xbf\xbfF",
25 "G",
26 "H\xe2\x82",
27 "\xacI\xe2\x82",
28 }
29
30 want := strings.Replace("hello.wo?ld.ABC??????????D??E??????FGH\x80I??", "?", "\x1a", -1)
31
32 transformer := encoding.ReplaceUnsupported(charmap.Windows1252.NewEncoder())
33 gotBuf := make([]byte, 0, 1024)
34 src := make([]byte, 0, 1024)
35 for i, input := range inputs {
36 dst := make([]byte, 1024)
37 src = append(src, input...)
38 atEOF := i == len(inputs)-1
39 nDst, nSrc, err := transformer.Transform(dst, src, atEOF)
40 gotBuf = append(gotBuf, dst[:nDst]...)
41 src = src[nSrc:]
42 if err != nil && err != transform.ErrShortSrc {
43 t.Fatalf("i=%d: %v", i, err)
44 }
45 if atEOF && err != nil {
46 t.Fatalf("i=%d: atEOF: %v", i, err)
47 }
48 }
49 if got := string(gotBuf); got != want {
50 t.Fatalf("\ngot %+q\nwant %+q", got, want)
51 }
52 }
53
54 func TestReplacement(t *testing.T) {
55 for _, direction := range []string{"Decode", "Encode"} {
56 enc, want := (transform.Transformer)(nil), ""
57 if direction == "Decode" {
58 enc = encoding.Replacement.NewDecoder()
59 want = "\ufffd"
60 } else {
61 enc = encoding.Replacement.NewEncoder()
62 want = "AB\x00CD\ufffdYZ"
63 }
64 sr := strings.NewReader("AB\x00CD\x80YZ")
65 g, err := io.ReadAll(transform.NewReader(sr, enc))
66 if err != nil {
67 t.Errorf("%s: ReadAll: %v", direction, err)
68 continue
69 }
70 if got := string(g); got != want {
71 t.Errorf("%s:\ngot %q\nwant %q", direction, got, want)
72 continue
73 }
74 }
75 }
76
77 func TestUTF8Validator(t *testing.T) {
78 testCases := []struct {
79 desc string
80 dstSize int
81 src string
82 atEOF bool
83 want string
84 wantErr error
85 }{
86 {
87 "empty input",
88 100,
89 "",
90 false,
91 "",
92 nil,
93 },
94 {
95 "valid 1-byte 1-rune input",
96 100,
97 "a",
98 false,
99 "a",
100 nil,
101 },
102 {
103 "valid 3-byte 1-rune input",
104 100,
105 "\u1234",
106 false,
107 "\u1234",
108 nil,
109 },
110 {
111 "valid 5-byte 3-rune input",
112 100,
113 "a\u0100\u0101",
114 false,
115 "a\u0100\u0101",
116 nil,
117 },
118 {
119 "perfectly sized dst (non-ASCII)",
120 5,
121 "a\u0100\u0101",
122 false,
123 "a\u0100\u0101",
124 nil,
125 },
126 {
127 "short dst (non-ASCII)",
128 4,
129 "a\u0100\u0101",
130 false,
131 "a\u0100",
132 transform.ErrShortDst,
133 },
134 {
135 "perfectly sized dst (ASCII)",
136 5,
137 "abcde",
138 false,
139 "abcde",
140 nil,
141 },
142 {
143 "short dst (ASCII)",
144 4,
145 "abcde",
146 false,
147 "abcd",
148 transform.ErrShortDst,
149 },
150 {
151 "partial input (!EOF)",
152 100,
153 "a\u0100\xf1",
154 false,
155 "a\u0100",
156 transform.ErrShortSrc,
157 },
158 {
159 "invalid input (EOF)",
160 100,
161 "a\u0100\xf1",
162 true,
163 "a\u0100",
164 encoding.ErrInvalidUTF8,
165 },
166 {
167 "invalid input (!EOF)",
168 100,
169 "a\u0100\x80",
170 false,
171 "a\u0100",
172 encoding.ErrInvalidUTF8,
173 },
174 {
175 "invalid input (above U+10FFFF)",
176 100,
177 "a\u0100\xf7\xbf\xbf\xbf",
178 false,
179 "a\u0100",
180 encoding.ErrInvalidUTF8,
181 },
182 {
183 "invalid input (surrogate half)",
184 100,
185 "a\u0100\xed\xa0\x80",
186 false,
187 "a\u0100",
188 encoding.ErrInvalidUTF8,
189 },
190 }
191 for _, tc := range testCases {
192 dst := make([]byte, tc.dstSize)
193 nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF)
194 if nDst < 0 || len(dst) < nDst {
195 t.Errorf("%s: nDst=%d out of range", tc.desc, nDst)
196 continue
197 }
198 got := string(dst[:nDst])
199 if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr {
200 t.Errorf("%s:\ngot %+q, %d, %v\nwant %+q, %d, %v",
201 tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr)
202 continue
203 }
204 }
205 }
206
207 func TestErrorHandler(t *testing.T) {
208 testCases := []struct {
209 desc string
210 handler func(*encoding.Encoder) *encoding.Encoder
211 sizeDst int
212 src, want string
213 nSrc int
214 err error
215 }{
216 {
217 desc: "one rune replacement",
218 handler: encoding.ReplaceUnsupported,
219 sizeDst: 100,
220 src: "\uAC00",
221 want: "\x1a",
222 nSrc: 3,
223 },
224 {
225 desc: "mid-stream rune replacement",
226 handler: encoding.ReplaceUnsupported,
227 sizeDst: 100,
228 src: "a\uAC00bcd\u00e9",
229 want: "a\x1abcd\xe9",
230 nSrc: 9,
231 },
232 {
233 desc: "at end rune replacement",
234 handler: encoding.ReplaceUnsupported,
235 sizeDst: 10,
236 src: "\u00e9\uAC00",
237 want: "\xe9\x1a",
238 nSrc: 5,
239 },
240 {
241 desc: "short buffer replacement",
242 handler: encoding.ReplaceUnsupported,
243 sizeDst: 1,
244 src: "\u00e9\uAC00",
245 want: "\xe9",
246 nSrc: 2,
247 err: transform.ErrShortDst,
248 },
249 {
250 desc: "one rune html escape",
251 handler: encoding.HTMLEscapeUnsupported,
252 sizeDst: 100,
253 src: "\uAC00",
254 want: "가",
255 nSrc: 3,
256 },
257 {
258 desc: "mid-stream html escape",
259 handler: encoding.HTMLEscapeUnsupported,
260 sizeDst: 100,
261 src: "\u00e9\uAC00dcba",
262 want: "\xe9가dcba",
263 nSrc: 9,
264 },
265 {
266 desc: "short buffer html escape",
267 handler: encoding.HTMLEscapeUnsupported,
268 sizeDst: 9,
269 src: "ab\uAC01",
270 want: "ab",
271 nSrc: 2,
272 err: transform.ErrShortDst,
273 },
274 }
275 for i, tc := range testCases {
276 tr := tc.handler(charmap.Windows1250.NewEncoder())
277 b := make([]byte, tc.sizeDst)
278 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), true)
279 if err != tc.err {
280 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
281 }
282 if got := string(b[:nDst]); got != tc.want {
283 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
284 }
285 if nSrc != tc.nSrc {
286 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
287 }
288
289 }
290 }
291
View as plain text