1
2
3
4
5
6
7
8
9
10
11 package encoding
12
13 import (
14 "errors"
15 "io"
16 "strconv"
17 "unicode/utf8"
18
19 "golang.org/x/text/encoding/internal/identifier"
20 "golang.org/x/text/transform"
21 )
22
23
24
25
26
27
28
29
30
31
32 type Encoding interface {
33
34 NewDecoder() *Decoder
35
36
37 NewEncoder() *Encoder
38 }
39
40
41
42
43
44
45 type Decoder struct {
46 transform.Transformer
47
48
49
50
51 _ struct{}
52 }
53
54
55
56 func (d *Decoder) Bytes(b []byte) ([]byte, error) {
57 b, _, err := transform.Bytes(d, b)
58 if err != nil {
59 return nil, err
60 }
61 return b, nil
62 }
63
64
65
66 func (d *Decoder) String(s string) (string, error) {
67 s, _, err := transform.String(d, s)
68 if err != nil {
69 return "", err
70 }
71 return s, nil
72 }
73
74
75
76
77
78 func (d *Decoder) Reader(r io.Reader) io.Reader {
79 return transform.NewReader(r, d)
80 }
81
82
83
84
85
86
87
88
89 type Encoder struct {
90 transform.Transformer
91
92
93
94
95 _ struct{}
96 }
97
98
99
100 func (e *Encoder) Bytes(b []byte) ([]byte, error) {
101 b, _, err := transform.Bytes(e, b)
102 if err != nil {
103 return nil, err
104 }
105 return b, nil
106 }
107
108
109
110 func (e *Encoder) String(s string) (string, error) {
111 s, _, err := transform.String(e, s)
112 if err != nil {
113 return "", err
114 }
115 return s, nil
116 }
117
118
119
120
121
122 func (e *Encoder) Writer(w io.Writer) io.Writer {
123 return transform.NewWriter(w, e)
124 }
125
126
127
128 const ASCIISub = '\x1a'
129
130
131
132 var Nop Encoding = nop{}
133
134 type nop struct{}
135
136 func (nop) NewDecoder() *Decoder {
137 return &Decoder{Transformer: transform.Nop}
138 }
139 func (nop) NewEncoder() *Encoder {
140 return &Encoder{Transformer: transform.Nop}
141 }
142
143
144
145
146
147
148
149 var Replacement Encoding = replacement{}
150
151 type replacement struct{}
152
153 func (replacement) NewDecoder() *Decoder {
154 return &Decoder{Transformer: replacementDecoder{}}
155 }
156
157 func (replacement) NewEncoder() *Encoder {
158 return &Encoder{Transformer: replacementEncoder{}}
159 }
160
161 func (replacement) ID() (mib identifier.MIB, other string) {
162 return identifier.Replacement, ""
163 }
164
165 type replacementDecoder struct{ transform.NopResetter }
166
167 func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
168 if len(dst) < 3 {
169 return 0, 0, transform.ErrShortDst
170 }
171 if atEOF {
172 const fffd = "\ufffd"
173 dst[0] = fffd[0]
174 dst[1] = fffd[1]
175 dst[2] = fffd[2]
176 nDst = 3
177 }
178 return nDst, len(src), nil
179 }
180
181 type replacementEncoder struct{ transform.NopResetter }
182
183 func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
184 r, size := rune(0), 0
185
186 for ; nSrc < len(src); nSrc += size {
187 r = rune(src[nSrc])
188
189
190 if r < utf8.RuneSelf {
191 size = 1
192
193 } else {
194
195 r, size = utf8.DecodeRune(src[nSrc:])
196 if size == 1 {
197
198
199
200 if !atEOF && !utf8.FullRune(src[nSrc:]) {
201 err = transform.ErrShortSrc
202 break
203 }
204 r = '\ufffd'
205 }
206 }
207
208 if nDst+utf8.RuneLen(r) > len(dst) {
209 err = transform.ErrShortDst
210 break
211 }
212 nDst += utf8.EncodeRune(dst[nDst:], r)
213 }
214 return nDst, nSrc, err
215 }
216
217
218
219
220
221
222
223
224 func HTMLEscapeUnsupported(e *Encoder) *Encoder {
225 return &Encoder{Transformer: &errorHandler{e, errorToHTML}}
226 }
227
228
229
230
231
232
233
234 func ReplaceUnsupported(e *Encoder) *Encoder {
235 return &Encoder{Transformer: &errorHandler{e, errorToReplacement}}
236 }
237
238 type errorHandler struct {
239 *Encoder
240 handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)
241 }
242
243
244 type repertoireError interface {
245 Replacement() byte
246 }
247
248 func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
249 nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF)
250 for err != nil {
251 rerr, ok := err.(repertoireError)
252 if !ok {
253 return nDst, nSrc, err
254 }
255 r, sz := utf8.DecodeRune(src[nSrc:])
256 n, ok := h.handler(dst[nDst:], r, rerr)
257 if !ok {
258 return nDst, nSrc, transform.ErrShortDst
259 }
260 err = nil
261 nDst += n
262 if nSrc += sz; nSrc < len(src) {
263 var dn, sn int
264 dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF)
265 nDst += dn
266 nSrc += sn
267 }
268 }
269 return nDst, nSrc, err
270 }
271
272 func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) {
273 buf := [8]byte{}
274 b := strconv.AppendUint(buf[:0], uint64(r), 10)
275 if n = len(b) + len("&#;"); n >= len(dst) {
276 return 0, false
277 }
278 dst[0] = '&'
279 dst[1] = '#'
280 dst[copy(dst[2:], b)+2] = ';'
281 return n, true
282 }
283
284 func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) {
285 if len(dst) == 0 {
286 return 0, false
287 }
288 dst[0] = err.Replacement()
289 return 1, true
290 }
291
292
293 var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
294
295
296
297 var UTF8Validator transform.Transformer = utf8Validator{}
298
299 type utf8Validator struct{ transform.NopResetter }
300
301 func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
302 n := len(src)
303 if n > len(dst) {
304 n = len(dst)
305 }
306 for i := 0; i < n; {
307 if c := src[i]; c < utf8.RuneSelf {
308 dst[i] = c
309 i++
310 continue
311 }
312 _, size := utf8.DecodeRune(src[i:])
313 if size == 1 {
314
315
316
317 err = ErrInvalidUTF8
318 if !atEOF && !utf8.FullRune(src[i:]) {
319 err = transform.ErrShortSrc
320 }
321 return i, i, err
322 }
323 if i+size > len(dst) {
324 return i, i, transform.ErrShortDst
325 }
326 for ; size > 0; size-- {
327 dst[i] = src[i]
328 i++
329 }
330 }
331 if len(src) > len(dst) {
332 err = transform.ErrShortDst
333 }
334 return n, n, err
335 }
336
View as plain text