1
2
3
4
5
6 package runes
7
8 import (
9 "unicode"
10 "unicode/utf8"
11
12 "golang.org/x/text/transform"
13 )
14
15
16 type Set interface {
17
18 Contains(r rune) bool
19 }
20
21 type setFunc func(rune) bool
22
23 func (s setFunc) Contains(r rune) bool {
24 return s(r)
25 }
26
27
28
29
30
31
32 func In(rt *unicode.RangeTable) Set {
33 return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
34 }
35
36
37
38 func NotIn(rt *unicode.RangeTable) Set {
39 return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })
40 }
41
42
43 func Predicate(f func(rune) bool) Set {
44 return setFunc(f)
45 }
46
47
48 type Transformer struct {
49 t transform.SpanningTransformer
50 }
51
52 func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
53 return t.t.Transform(dst, src, atEOF)
54 }
55
56 func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) {
57 return t.t.Span(b, atEOF)
58 }
59
60 func (t Transformer) Reset() { t.t.Reset() }
61
62
63
64
65 func (t Transformer) Bytes(b []byte) []byte {
66 b, _, err := transform.Bytes(t, b)
67 if err != nil {
68 return nil
69 }
70 return b
71 }
72
73
74
75
76 func (t Transformer) String(s string) string {
77 s, _, err := transform.String(t, s)
78 if err != nil {
79 return ""
80 }
81 return s
82 }
83
84
85
86
87
88
89 const runeErrorString = string(utf8.RuneError)
90
91
92
93 func Remove(s Set) Transformer {
94 if f, ok := s.(setFunc); ok {
95
96
97
98 return Transformer{remove(f)}
99 }
100 return Transformer{remove(s.Contains)}
101 }
102
103
104
105 type remove func(r rune) bool
106
107 func (remove) Reset() {}
108
109
110 func (t remove) Span(src []byte, atEOF bool) (n int, err error) {
111 for r, size := rune(0), 0; n < len(src); {
112 if r = rune(src[n]); r < utf8.RuneSelf {
113 size = 1
114 } else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
115
116 if !atEOF && !utf8.FullRune(src[n:]) {
117 err = transform.ErrShortSrc
118 } else {
119 err = transform.ErrEndOfSpan
120 }
121 break
122 }
123 if t(r) {
124 err = transform.ErrEndOfSpan
125 break
126 }
127 n += size
128 }
129 return
130 }
131
132
133 func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
134 for r, size := rune(0), 0; nSrc < len(src); {
135 if r = rune(src[nSrc]); r < utf8.RuneSelf {
136 size = 1
137 } else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
138
139 if !atEOF && !utf8.FullRune(src[nSrc:]) {
140 err = transform.ErrShortSrc
141 break
142 }
143
144
145
146
147 if !t(utf8.RuneError) {
148 if nDst+3 > len(dst) {
149 err = transform.ErrShortDst
150 break
151 }
152 dst[nDst+0] = runeErrorString[0]
153 dst[nDst+1] = runeErrorString[1]
154 dst[nDst+2] = runeErrorString[2]
155 nDst += 3
156 }
157 nSrc++
158 continue
159 }
160 if t(r) {
161 nSrc += size
162 continue
163 }
164 if nDst+size > len(dst) {
165 err = transform.ErrShortDst
166 break
167 }
168 for i := 0; i < size; i++ {
169 dst[nDst] = src[nSrc]
170 nDst++
171 nSrc++
172 }
173 }
174 return
175 }
176
177
178
179
180 func Map(mapping func(rune) rune) Transformer {
181 return Transformer{mapper(mapping)}
182 }
183
184 type mapper func(rune) rune
185
186 func (mapper) Reset() {}
187
188
189 func (t mapper) Span(src []byte, atEOF bool) (n int, err error) {
190 for r, size := rune(0), 0; n < len(src); n += size {
191 if r = rune(src[n]); r < utf8.RuneSelf {
192 size = 1
193 } else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
194
195 if !atEOF && !utf8.FullRune(src[n:]) {
196 err = transform.ErrShortSrc
197 } else {
198 err = transform.ErrEndOfSpan
199 }
200 break
201 }
202 if t(r) != r {
203 err = transform.ErrEndOfSpan
204 break
205 }
206 }
207 return n, err
208 }
209
210
211 func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
212 var replacement rune
213 var b [utf8.UTFMax]byte
214
215 for r, size := rune(0), 0; nSrc < len(src); {
216 if r = rune(src[nSrc]); r < utf8.RuneSelf {
217 if replacement = t(r); replacement < utf8.RuneSelf {
218 if nDst == len(dst) {
219 err = transform.ErrShortDst
220 break
221 }
222 dst[nDst] = byte(replacement)
223 nDst++
224 nSrc++
225 continue
226 }
227 size = 1
228 } else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
229
230 if !atEOF && !utf8.FullRune(src[nSrc:]) {
231 err = transform.ErrShortSrc
232 break
233 }
234
235 if replacement = t(utf8.RuneError); replacement == utf8.RuneError {
236 if nDst+3 > len(dst) {
237 err = transform.ErrShortDst
238 break
239 }
240 dst[nDst+0] = runeErrorString[0]
241 dst[nDst+1] = runeErrorString[1]
242 dst[nDst+2] = runeErrorString[2]
243 nDst += 3
244 nSrc++
245 continue
246 }
247 } else if replacement = t(r); replacement == r {
248 if nDst+size > len(dst) {
249 err = transform.ErrShortDst
250 break
251 }
252 for i := 0; i < size; i++ {
253 dst[nDst] = src[nSrc]
254 nDst++
255 nSrc++
256 }
257 continue
258 }
259
260 n := utf8.EncodeRune(b[:], replacement)
261
262 if nDst+n > len(dst) {
263 err = transform.ErrShortDst
264 break
265 }
266 for i := 0; i < n; i++ {
267 dst[nDst] = b[i]
268 nDst++
269 }
270 nSrc += size
271 }
272 return
273 }
274
275
276
277 func ReplaceIllFormed() Transformer {
278 return Transformer{&replaceIllFormed{}}
279 }
280
281 type replaceIllFormed struct{ transform.NopResetter }
282
283 func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
284 for n < len(src) {
285
286 if src[n] < utf8.RuneSelf {
287 n++
288 continue
289 }
290
291 r, size := utf8.DecodeRune(src[n:])
292
293
294 if r != utf8.RuneError || size != 1 {
295 n += size
296 continue
297 }
298
299
300 if !atEOF && !utf8.FullRune(src[n:]) {
301 err = transform.ErrShortSrc
302 break
303 }
304
305
306 err = transform.ErrEndOfSpan
307 break
308 }
309 return n, err
310 }
311
312 func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
313 for nSrc < len(src) {
314
315 if r := src[nSrc]; r < utf8.RuneSelf {
316 if nDst == len(dst) {
317 err = transform.ErrShortDst
318 break
319 }
320 dst[nDst] = r
321 nDst++
322 nSrc++
323 continue
324 }
325
326
327 if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 {
328 if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
329 err = transform.ErrShortDst
330 break
331 }
332 nDst += size
333 nSrc += size
334 continue
335 }
336
337
338 if !atEOF && !utf8.FullRune(src[nSrc:]) {
339 err = transform.ErrShortSrc
340 break
341 }
342
343
344 if nDst+3 > len(dst) {
345 err = transform.ErrShortDst
346 break
347 }
348 dst[nDst+0] = runeErrorString[0]
349 dst[nDst+1] = runeErrorString[1]
350 dst[nDst+2] = runeErrorString[2]
351 nDst += 3
352 nSrc++
353 }
354 return nDst, nSrc, err
355 }
356
View as plain text