1
2
3
4
5 package precis
6
7 import (
8 "bytes"
9 "errors"
10 "unicode/utf8"
11
12 "golang.org/x/text/cases"
13 "golang.org/x/text/language"
14 "golang.org/x/text/runes"
15 "golang.org/x/text/secure/bidirule"
16 "golang.org/x/text/transform"
17 "golang.org/x/text/width"
18 )
19
20 var (
21 errDisallowedRune = errors.New("precis: disallowed rune encountered")
22 )
23
24 var dpTrie = newDerivedPropertiesTrie(0)
25
26
27
28 type Profile struct {
29 options
30 class *class
31 }
32
33
34
35
36
37 func NewIdentifier(opts ...Option) *Profile {
38 return &Profile{
39 options: getOpts(opts...),
40 class: identifier,
41 }
42 }
43
44
45
46
47
48 func NewFreeform(opts ...Option) *Profile {
49 return &Profile{
50 options: getOpts(opts...),
51 class: freeform,
52 }
53 }
54
55
56
57
58
59 func NewRestrictedProfile(parent *Profile, disallow runes.Set) *Profile {
60 p := *parent
61 Disallow(disallow)(&p.options)
62 return &p
63 }
64
65
66
67 func (p *Profile) NewTransformer() *Transformer {
68 var ts []transform.Transformer
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85 r := 1
86 if p.options.repeat {
87 r = 4
88 }
89 for ; r > 0; r-- {
90 if p.options.foldWidth {
91 ts = append(ts, width.Fold)
92 }
93
94 for _, f := range p.options.additional {
95 ts = append(ts, f())
96 }
97
98 if p.options.cases != nil {
99 ts = append(ts, p.options.cases)
100 }
101
102 ts = append(ts, p.options.norm)
103
104 if p.options.bidiRule {
105 ts = append(ts, bidirule.New())
106 }
107
108 ts = append(ts, &checker{p: p, allowed: p.Allowed()})
109 }
110
111
112
113 return &Transformer{transform.Chain(ts...)}
114 }
115
116 var errEmptyString = errors.New("precis: transformation resulted in empty string")
117
118 type buffers struct {
119 src []byte
120 buf [2][]byte
121 next int
122 }
123
124 func (b *buffers) apply(t transform.SpanningTransformer) (err error) {
125 n, err := t.Span(b.src, true)
126 if err != transform.ErrEndOfSpan {
127 return err
128 }
129 x := b.next & 1
130 if b.buf[x] == nil {
131 b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2)
132 }
133 span := append(b.buf[x][:0], b.src[:n]...)
134 b.src, _, err = transform.Append(t, span, b.src[n:])
135 b.buf[x] = b.src
136 b.next++
137 return err
138 }
139
140
141 var (
142 foldWidthT transform.SpanningTransformer = width.Fold
143 lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false))
144 )
145
146
147
148 func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) {
149 b.src = src
150
151 ascii := true
152 for _, c := range src {
153 if c >= utf8.RuneSelf {
154 ascii = false
155 break
156 }
157 }
158
159 if ascii {
160 for _, f := range p.options.additional {
161 if err = b.apply(f()); err != nil {
162 return nil, err
163 }
164 }
165 switch {
166 case p.options.asciiLower || (comparing && p.options.ignorecase):
167 for i, c := range b.src {
168 if 'A' <= c && c <= 'Z' {
169 b.src[i] = c ^ 1<<5
170 }
171 }
172 case p.options.cases != nil:
173 b.apply(p.options.cases)
174 }
175 c := checker{p: p}
176 if _, err := c.span(b.src, true); err != nil {
177 return nil, err
178 }
179 if p.disallow != nil {
180 for _, c := range b.src {
181 if p.disallow.Contains(rune(c)) {
182 return nil, errDisallowedRune
183 }
184 }
185 }
186 if p.options.disallowEmpty && len(b.src) == 0 {
187 return nil, errEmptyString
188 }
189 return b.src, nil
190 }
191
192
193
194
195 r := 1
196 if p.options.repeat {
197 r = 4
198 }
199 for ; r > 0; r-- {
200
201 if p.options.foldWidth || (p.options.ignorecase && comparing) {
202 b.apply(foldWidthT)
203 }
204 for _, f := range p.options.additional {
205 if err = b.apply(f()); err != nil {
206 return nil, err
207 }
208 }
209 if p.options.cases != nil {
210 b.apply(p.options.cases)
211 }
212 if comparing && p.options.ignorecase {
213 b.apply(lowerCaseT)
214 }
215 b.apply(p.norm)
216 if p.options.bidiRule && !bidirule.Valid(b.src) {
217 return nil, bidirule.ErrInvalid
218 }
219 c := checker{p: p}
220 if _, err := c.span(b.src, true); err != nil {
221 return nil, err
222 }
223 if p.disallow != nil {
224 for i := 0; i < len(b.src); {
225 r, size := utf8.DecodeRune(b.src[i:])
226 if p.disallow.Contains(r) {
227 return nil, errDisallowedRune
228 }
229 i += size
230 }
231 }
232 if p.options.disallowEmpty && len(b.src) == 0 {
233 return nil, errEmptyString
234 }
235 }
236 return b.src, nil
237 }
238
239
240
241 func (p *Profile) Append(dst, src []byte) ([]byte, error) {
242 var buf buffers
243 b, err := buf.enforce(p, src, false)
244 if err != nil {
245 return nil, err
246 }
247 return append(dst, b...), nil
248 }
249
250 func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
251 var buf buffers
252 b, err := buf.enforce(p, b, key)
253 if err != nil {
254 return nil, err
255 }
256 if buf.next == 0 {
257 c := make([]byte, len(b))
258 copy(c, b)
259 return c, nil
260 }
261 return b, nil
262 }
263
264
265 func (p *Profile) Bytes(b []byte) ([]byte, error) {
266 return processBytes(p, b, false)
267 }
268
269
270
271
272
273 func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) {
274 var buf buffers
275 b, err := buf.enforce(p, src, true)
276 if err != nil {
277 return nil, err
278 }
279 return append(dst, b...), nil
280 }
281
282 func processString(p *Profile, s string, key bool) (string, error) {
283 var buf buffers
284 b, err := buf.enforce(p, []byte(s), key)
285 if err != nil {
286 return "", err
287 }
288 return string(b), nil
289 }
290
291
292 func (p *Profile) String(s string) (string, error) {
293 return processString(p, s, false)
294 }
295
296
297
298 func (p *Profile) CompareKey(s string) (string, error) {
299 return processString(p, s, true)
300 }
301
302
303
304
305 func (p *Profile) Compare(a, b string) bool {
306 var buf buffers
307
308 akey, err := buf.enforce(p, []byte(a), true)
309 if err != nil {
310 return false
311 }
312
313 buf = buffers{}
314 bkey, err := buf.enforce(p, []byte(b), true)
315 if err != nil {
316 return false
317 }
318
319 return bytes.Equal(akey, bkey)
320 }
321
322
323
324
325 func (p *Profile) Allowed() runes.Set {
326 if p.options.disallow != nil {
327 return runes.Predicate(func(r rune) bool {
328 return p.class.Contains(r) && !p.options.disallow.Contains(r)
329 })
330 }
331 return p.class
332 }
333
334 type checker struct {
335 p *Profile
336 allowed runes.Set
337
338 beforeBits catBitmap
339 termBits catBitmap
340 acceptBits catBitmap
341 }
342
343 func (c *checker) Reset() {
344 c.beforeBits = 0
345 c.termBits = 0
346 c.acceptBits = 0
347 }
348
349 func (c *checker) span(src []byte, atEOF bool) (n int, err error) {
350 for n < len(src) {
351 e, sz := dpTrie.lookup(src[n:])
352 d := categoryTransitions[category(e&catMask)]
353 if sz == 0 {
354 if !atEOF {
355 return n, transform.ErrShortSrc
356 }
357 return n, errDisallowedRune
358 }
359 doLookAhead := false
360 if property(e) < c.p.class.validFrom {
361 if d.rule == nil {
362 return n, errDisallowedRune
363 }
364 doLookAhead, err = d.rule(c.beforeBits)
365 if err != nil {
366 return n, err
367 }
368 }
369 c.beforeBits &= d.keep
370 c.beforeBits |= d.set
371 if c.termBits != 0 {
372
373 if c.beforeBits&c.termBits != 0 {
374 c.termBits = 0
375 c.acceptBits = 0
376 } else if c.beforeBits&c.acceptBits == 0 {
377
378 return n, errContext
379 }
380 }
381 if doLookAhead {
382 if c.termBits != 0 {
383
384 return n, errContext
385 }
386 c.termBits = d.term
387 c.acceptBits = d.accept
388 }
389 n += sz
390 }
391 if m := c.beforeBits >> finalShift; c.beforeBits&m != m || c.termBits != 0 {
392 err = errContext
393 }
394 return n, err
395 }
396
397
398
399 func (c checker) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
400 short := false
401 if len(dst) < len(src) {
402 src = src[:len(dst)]
403 atEOF = false
404 short = true
405 }
406 nSrc, err = c.span(src, atEOF)
407 nDst = copy(dst, src[:nSrc])
408 if short && (err == transform.ErrShortSrc || err == nil) {
409 err = transform.ErrShortDst
410 }
411 return nDst, nSrc, err
412 }
413
View as plain text