1
2
3
4
5 package syntax
6
7 import (
8 "sort"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15
16 type Error struct {
17 Code ErrorCode
18 Expr string
19 }
20
21 func (e *Error) Error() string {
22 return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
23 }
24
25
26 type ErrorCode string
27
28 const (
29
30 ErrInternalError ErrorCode = "regexp/syntax: internal error"
31
32
33 ErrInvalidCharClass ErrorCode = "invalid character class"
34 ErrInvalidCharRange ErrorCode = "invalid character class range"
35 ErrInvalidEscape ErrorCode = "invalid escape sequence"
36 ErrInvalidNamedCapture ErrorCode = "invalid named capture"
37 ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"
38 ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"
39 ErrInvalidRepeatSize ErrorCode = "invalid repeat count"
40 ErrInvalidUTF8 ErrorCode = "invalid UTF-8"
41 ErrMissingBracket ErrorCode = "missing closing ]"
42 ErrMissingParen ErrorCode = "missing closing )"
43 ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
44 ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
45 ErrUnexpectedParen ErrorCode = "unexpected )"
46 ErrNestingDepth ErrorCode = "expression nests too deeply"
47 ErrLarge ErrorCode = "expression too large"
48 )
49
50 func (e ErrorCode) String() string {
51 return string(e)
52 }
53
54
55 type Flags uint16
56
57 const (
58 FoldCase Flags = 1 << iota
59 Literal
60 ClassNL
61 DotNL
62 OneLine
63 NonGreedy
64 PerlX
65 UnicodeGroups
66 WasDollar
67 Simple
68
69 MatchNL = ClassNL | DotNL
70
71 Perl = ClassNL | OneLine | PerlX | UnicodeGroups
72 POSIX Flags = 0
73 )
74
75
76 const (
77 opLeftParen = opPseudo + iota
78 opVerticalBar
79 )
80
81
82
83
84
85
86
87
88
89
90
91
92
93 const maxHeight = 1000
94
95
96
97
98
99
100
101 const (
102 maxSize = 128 << 20 / instSize
103 instSize = 5 * 8
104 )
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 const (
122 maxRunes = 128 << 20 / runeSize
123 runeSize = 4
124 )
125
126 type parser struct {
127 flags Flags
128 stack []*Regexp
129 free *Regexp
130 numCap int
131 wholeRegexp string
132 tmpClass []rune
133 numRegexp int
134 numRunes int
135 repeats int64
136 height map[*Regexp]int
137 size map[*Regexp]int64
138 }
139
140 func (p *parser) newRegexp(op Op) *Regexp {
141 re := p.free
142 if re != nil {
143 p.free = re.Sub0[0]
144 *re = Regexp{}
145 } else {
146 re = new(Regexp)
147 p.numRegexp++
148 }
149 re.Op = op
150 return re
151 }
152
153 func (p *parser) reuse(re *Regexp) {
154 if p.height != nil {
155 delete(p.height, re)
156 }
157 re.Sub0[0] = p.free
158 p.free = re
159 }
160
161 func (p *parser) checkLimits(re *Regexp) {
162 if p.numRunes > maxRunes {
163 panic(ErrLarge)
164 }
165 p.checkSize(re)
166 p.checkHeight(re)
167 }
168
169 func (p *parser) checkSize(re *Regexp) {
170 if p.size == nil {
171
172
173
174
175
176 if p.repeats == 0 {
177 p.repeats = 1
178 }
179 if re.Op == OpRepeat {
180 n := re.Max
181 if n == -1 {
182 n = re.Min
183 }
184 if n <= 0 {
185 n = 1
186 }
187 if int64(n) > maxSize/p.repeats {
188 p.repeats = maxSize
189 } else {
190 p.repeats *= int64(n)
191 }
192 }
193 if int64(p.numRegexp) < maxSize/p.repeats {
194 return
195 }
196
197
198
199
200 p.size = make(map[*Regexp]int64)
201 for _, re := range p.stack {
202 p.checkSize(re)
203 }
204 }
205
206 if p.calcSize(re, true) > maxSize {
207 panic(ErrLarge)
208 }
209 }
210
211 func (p *parser) calcSize(re *Regexp, force bool) int64 {
212 if !force {
213 if size, ok := p.size[re]; ok {
214 return size
215 }
216 }
217
218 var size int64
219 switch re.Op {
220 case OpLiteral:
221 size = int64(len(re.Rune))
222 case OpCapture, OpStar:
223
224 size = 2 + p.calcSize(re.Sub[0], false)
225 case OpPlus, OpQuest:
226 size = 1 + p.calcSize(re.Sub[0], false)
227 case OpConcat:
228 for _, sub := range re.Sub {
229 size += p.calcSize(sub, false)
230 }
231 case OpAlternate:
232 for _, sub := range re.Sub {
233 size += p.calcSize(sub, false)
234 }
235 if len(re.Sub) > 1 {
236 size += int64(len(re.Sub)) - 1
237 }
238 case OpRepeat:
239 sub := p.calcSize(re.Sub[0], false)
240 if re.Max == -1 {
241 if re.Min == 0 {
242 size = 2 + sub
243 } else {
244 size = 1 + int64(re.Min)*sub
245 }
246 break
247 }
248
249 size = int64(re.Max)*sub + int64(re.Max-re.Min)
250 }
251
252 if size < 1 {
253 size = 1
254 }
255 p.size[re] = size
256 return size
257 }
258
259 func (p *parser) checkHeight(re *Regexp) {
260 if p.numRegexp < maxHeight {
261 return
262 }
263 if p.height == nil {
264 p.height = make(map[*Regexp]int)
265 for _, re := range p.stack {
266 p.checkHeight(re)
267 }
268 }
269 if p.calcHeight(re, true) > maxHeight {
270 panic(ErrNestingDepth)
271 }
272 }
273
274 func (p *parser) calcHeight(re *Regexp, force bool) int {
275 if !force {
276 if h, ok := p.height[re]; ok {
277 return h
278 }
279 }
280 h := 1
281 for _, sub := range re.Sub {
282 hsub := p.calcHeight(sub, false)
283 if h < 1+hsub {
284 h = 1 + hsub
285 }
286 }
287 p.height[re] = h
288 return h
289 }
290
291
292
293
294 func (p *parser) push(re *Regexp) *Regexp {
295 p.numRunes += len(re.Rune)
296 if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
297
298 if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
299 return nil
300 }
301 re.Op = OpLiteral
302 re.Rune = re.Rune[:1]
303 re.Flags = p.flags &^ FoldCase
304 } else if re.Op == OpCharClass && len(re.Rune) == 4 &&
305 re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&
306 unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&
307 unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||
308 re.Op == OpCharClass && len(re.Rune) == 2 &&
309 re.Rune[0]+1 == re.Rune[1] &&
310 unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&
311 unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {
312
313 if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {
314 return nil
315 }
316
317
318 re.Op = OpLiteral
319 re.Rune = re.Rune[:1]
320 re.Flags = p.flags | FoldCase
321 } else {
322
323 p.maybeConcat(-1, 0)
324 }
325
326 p.stack = append(p.stack, re)
327 p.checkLimits(re)
328 return re
329 }
330
331
332
333
334
335
336
337
338
339
340 func (p *parser) maybeConcat(r rune, flags Flags) bool {
341 n := len(p.stack)
342 if n < 2 {
343 return false
344 }
345
346 re1 := p.stack[n-1]
347 re2 := p.stack[n-2]
348 if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {
349 return false
350 }
351
352
353 re2.Rune = append(re2.Rune, re1.Rune...)
354
355
356 if r >= 0 {
357 re1.Rune = re1.Rune0[:1]
358 re1.Rune[0] = r
359 re1.Flags = flags
360 return true
361 }
362
363 p.stack = p.stack[:n-1]
364 p.reuse(re1)
365 return false
366 }
367
368
369 func (p *parser) literal(r rune) {
370 re := p.newRegexp(OpLiteral)
371 re.Flags = p.flags
372 if p.flags&FoldCase != 0 {
373 r = minFoldRune(r)
374 }
375 re.Rune0[0] = r
376 re.Rune = re.Rune0[:1]
377 p.push(re)
378 }
379
380
381 func minFoldRune(r rune) rune {
382 if r < minFold || r > maxFold {
383 return r
384 }
385 m := r
386 r0 := r
387 for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) {
388 m = min(m, r)
389 }
390 return m
391 }
392
393
394
395 func (p *parser) op(op Op) *Regexp {
396 re := p.newRegexp(op)
397 re.Flags = p.flags
398 return p.push(re)
399 }
400
401
402
403
404
405 func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) {
406 flags := p.flags
407 if p.flags&PerlX != 0 {
408 if len(after) > 0 && after[0] == '?' {
409 after = after[1:]
410 flags ^= NonGreedy
411 }
412 if lastRepeat != "" {
413
414
415
416 return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]}
417 }
418 }
419 n := len(p.stack)
420 if n == 0 {
421 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
422 }
423 sub := p.stack[n-1]
424 if sub.Op >= opPseudo {
425 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
426 }
427
428 re := p.newRegexp(op)
429 re.Min = min
430 re.Max = max
431 re.Flags = flags
432 re.Sub = re.Sub0[:1]
433 re.Sub[0] = sub
434 p.stack[n-1] = re
435 p.checkLimits(re)
436
437 if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
438 return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
439 }
440
441 return after, nil
442 }
443
444
445
446
447
448
449
450
451
452
453 func repeatIsValid(re *Regexp, n int) bool {
454 if re.Op == OpRepeat {
455 m := re.Max
456 if m == 0 {
457 return true
458 }
459 if m < 0 {
460 m = re.Min
461 }
462 if m > n {
463 return false
464 }
465 if m > 0 {
466 n /= m
467 }
468 }
469 for _, sub := range re.Sub {
470 if !repeatIsValid(sub, n) {
471 return false
472 }
473 }
474 return true
475 }
476
477
478 func (p *parser) concat() *Regexp {
479 p.maybeConcat(-1, 0)
480
481
482 i := len(p.stack)
483 for i > 0 && p.stack[i-1].Op < opPseudo {
484 i--
485 }
486 subs := p.stack[i:]
487 p.stack = p.stack[:i]
488
489
490 if len(subs) == 0 {
491 return p.push(p.newRegexp(OpEmptyMatch))
492 }
493
494 return p.push(p.collapse(subs, OpConcat))
495 }
496
497
498 func (p *parser) alternate() *Regexp {
499
500
501 i := len(p.stack)
502 for i > 0 && p.stack[i-1].Op < opPseudo {
503 i--
504 }
505 subs := p.stack[i:]
506 p.stack = p.stack[:i]
507
508
509
510 if len(subs) > 0 {
511 cleanAlt(subs[len(subs)-1])
512 }
513
514
515
516 if len(subs) == 0 {
517 return p.push(p.newRegexp(OpNoMatch))
518 }
519
520 return p.push(p.collapse(subs, OpAlternate))
521 }
522
523
524 func cleanAlt(re *Regexp) {
525 switch re.Op {
526 case OpCharClass:
527 re.Rune = cleanClass(&re.Rune)
528 if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {
529 re.Rune = nil
530 re.Op = OpAnyChar
531 return
532 }
533 if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {
534 re.Rune = nil
535 re.Op = OpAnyCharNotNL
536 return
537 }
538 if cap(re.Rune)-len(re.Rune) > 100 {
539
540
541 re.Rune = append(re.Rune0[:0], re.Rune...)
542 }
543 }
544 }
545
546
547
548
549
550 func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
551 if len(subs) == 1 {
552 return subs[0]
553 }
554 re := p.newRegexp(op)
555 re.Sub = re.Sub0[:0]
556 for _, sub := range subs {
557 if sub.Op == op {
558 re.Sub = append(re.Sub, sub.Sub...)
559 p.reuse(sub)
560 } else {
561 re.Sub = append(re.Sub, sub)
562 }
563 }
564 if op == OpAlternate {
565 re.Sub = p.factor(re.Sub)
566 if len(re.Sub) == 1 {
567 old := re
568 re = re.Sub[0]
569 p.reuse(old)
570 }
571 }
572 return re
573 }
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590 func (p *parser) factor(sub []*Regexp) []*Regexp {
591 if len(sub) < 2 {
592 return sub
593 }
594
595
596 var str []rune
597 var strflags Flags
598 start := 0
599 out := sub[:0]
600 for i := 0; i <= len(sub); i++ {
601
602
603
604
605
606
607 var istr []rune
608 var iflags Flags
609 if i < len(sub) {
610 istr, iflags = p.leadingString(sub[i])
611 if iflags == strflags {
612 same := 0
613 for same < len(str) && same < len(istr) && str[same] == istr[same] {
614 same++
615 }
616 if same > 0 {
617
618
619 str = str[:same]
620 continue
621 }
622 }
623 }
624
625
626
627
628
629
630 if i == start {
631
632 } else if i == start+1 {
633
634 out = append(out, sub[start])
635 } else {
636
637 prefix := p.newRegexp(OpLiteral)
638 prefix.Flags = strflags
639 prefix.Rune = append(prefix.Rune[:0], str...)
640
641 for j := start; j < i; j++ {
642 sub[j] = p.removeLeadingString(sub[j], len(str))
643 p.checkLimits(sub[j])
644 }
645 suffix := p.collapse(sub[start:i], OpAlternate)
646
647 re := p.newRegexp(OpConcat)
648 re.Sub = append(re.Sub[:0], prefix, suffix)
649 out = append(out, re)
650 }
651
652
653 start = i
654 str = istr
655 strflags = iflags
656 }
657 sub = out
658
659
660
661
662
663
664
665
666
667 start = 0
668 out = sub[:0]
669 var first *Regexp
670 for i := 0; i <= len(sub); i++ {
671
672
673
674
675
676 var ifirst *Regexp
677 if i < len(sub) {
678 ifirst = p.leadingRegexp(sub[i])
679 if first != nil && first.Equal(ifirst) &&
680
681 (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) {
682 continue
683 }
684 }
685
686
687
688
689
690 if i == start {
691
692 } else if i == start+1 {
693
694 out = append(out, sub[start])
695 } else {
696
697 prefix := first
698 for j := start; j < i; j++ {
699 reuse := j != start
700 sub[j] = p.removeLeadingRegexp(sub[j], reuse)
701 p.checkLimits(sub[j])
702 }
703 suffix := p.collapse(sub[start:i], OpAlternate)
704
705 re := p.newRegexp(OpConcat)
706 re.Sub = append(re.Sub[:0], prefix, suffix)
707 out = append(out, re)
708 }
709
710
711 start = i
712 first = ifirst
713 }
714 sub = out
715
716
717 start = 0
718 out = sub[:0]
719 for i := 0; i <= len(sub); i++ {
720
721
722
723
724
725
726 if i < len(sub) && isCharClass(sub[i]) {
727 continue
728 }
729
730
731
732 if i == start {
733
734 } else if i == start+1 {
735 out = append(out, sub[start])
736 } else {
737
738
739 max := start
740 for j := start + 1; j < i; j++ {
741 if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {
742 max = j
743 }
744 }
745 sub[start], sub[max] = sub[max], sub[start]
746
747 for j := start + 1; j < i; j++ {
748 mergeCharClass(sub[start], sub[j])
749 p.reuse(sub[j])
750 }
751 cleanAlt(sub[start])
752 out = append(out, sub[start])
753 }
754
755
756 if i < len(sub) {
757 out = append(out, sub[i])
758 }
759 start = i + 1
760 }
761 sub = out
762
763
764 start = 0
765 out = sub[:0]
766 for i := range sub {
767 if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {
768 continue
769 }
770 out = append(out, sub[i])
771 }
772 sub = out
773
774 return sub
775 }
776
777
778
779 func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
780 if re.Op == OpConcat && len(re.Sub) > 0 {
781 re = re.Sub[0]
782 }
783 if re.Op != OpLiteral {
784 return nil, 0
785 }
786 return re.Rune, re.Flags & FoldCase
787 }
788
789
790
791 func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
792 if re.Op == OpConcat && len(re.Sub) > 0 {
793
794
795 sub := re.Sub[0]
796 sub = p.removeLeadingString(sub, n)
797 re.Sub[0] = sub
798 if sub.Op == OpEmptyMatch {
799 p.reuse(sub)
800 switch len(re.Sub) {
801 case 0, 1:
802
803 re.Op = OpEmptyMatch
804 re.Sub = nil
805 case 2:
806 old := re
807 re = re.Sub[1]
808 p.reuse(old)
809 default:
810 copy(re.Sub, re.Sub[1:])
811 re.Sub = re.Sub[:len(re.Sub)-1]
812 }
813 }
814 return re
815 }
816
817 if re.Op == OpLiteral {
818 re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]
819 if len(re.Rune) == 0 {
820 re.Op = OpEmptyMatch
821 }
822 }
823 return re
824 }
825
826
827
828 func (p *parser) leadingRegexp(re *Regexp) *Regexp {
829 if re.Op == OpEmptyMatch {
830 return nil
831 }
832 if re.Op == OpConcat && len(re.Sub) > 0 {
833 sub := re.Sub[0]
834 if sub.Op == OpEmptyMatch {
835 return nil
836 }
837 return sub
838 }
839 return re
840 }
841
842
843
844
845 func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
846 if re.Op == OpConcat && len(re.Sub) > 0 {
847 if reuse {
848 p.reuse(re.Sub[0])
849 }
850 re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]
851 switch len(re.Sub) {
852 case 0:
853 re.Op = OpEmptyMatch
854 re.Sub = nil
855 case 1:
856 old := re
857 re = re.Sub[0]
858 p.reuse(old)
859 }
860 return re
861 }
862 if reuse {
863 p.reuse(re)
864 }
865 return p.newRegexp(OpEmptyMatch)
866 }
867
868 func literalRegexp(s string, flags Flags) *Regexp {
869 re := &Regexp{Op: OpLiteral}
870 re.Flags = flags
871 re.Rune = re.Rune0[:0]
872 for _, c := range s {
873 if len(re.Rune) >= cap(re.Rune) {
874
875 re.Rune = []rune(s)
876 break
877 }
878 re.Rune = append(re.Rune, c)
879 }
880 return re
881 }
882
883
884
885
886
887
888 func Parse(s string, flags Flags) (*Regexp, error) {
889 return parse(s, flags)
890 }
891
892 func parse(s string, flags Flags) (_ *Regexp, err error) {
893 defer func() {
894 switch r := recover(); r {
895 default:
896 panic(r)
897 case nil:
898
899 case ErrLarge:
900 err = &Error{Code: ErrLarge, Expr: s}
901 case ErrNestingDepth:
902 err = &Error{Code: ErrNestingDepth, Expr: s}
903 }
904 }()
905
906 if flags&Literal != 0 {
907
908 if err := checkUTF8(s); err != nil {
909 return nil, err
910 }
911 return literalRegexp(s, flags), nil
912 }
913
914
915 var (
916 p parser
917 c rune
918 op Op
919 lastRepeat string
920 )
921 p.flags = flags
922 p.wholeRegexp = s
923 t := s
924 for t != "" {
925 repeat := ""
926 BigSwitch:
927 switch t[0] {
928 default:
929 if c, t, err = nextRune(t); err != nil {
930 return nil, err
931 }
932 p.literal(c)
933
934 case '(':
935 if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' {
936
937 if t, err = p.parsePerlFlags(t); err != nil {
938 return nil, err
939 }
940 break
941 }
942 p.numCap++
943 p.op(opLeftParen).Cap = p.numCap
944 t = t[1:]
945 case '|':
946 if err = p.parseVerticalBar(); err != nil {
947 return nil, err
948 }
949 t = t[1:]
950 case ')':
951 if err = p.parseRightParen(); err != nil {
952 return nil, err
953 }
954 t = t[1:]
955 case '^':
956 if p.flags&OneLine != 0 {
957 p.op(OpBeginText)
958 } else {
959 p.op(OpBeginLine)
960 }
961 t = t[1:]
962 case '$':
963 if p.flags&OneLine != 0 {
964 p.op(OpEndText).Flags |= WasDollar
965 } else {
966 p.op(OpEndLine)
967 }
968 t = t[1:]
969 case '.':
970 if p.flags&DotNL != 0 {
971 p.op(OpAnyChar)
972 } else {
973 p.op(OpAnyCharNotNL)
974 }
975 t = t[1:]
976 case '[':
977 if t, err = p.parseClass(t); err != nil {
978 return nil, err
979 }
980 case '*', '+', '?':
981 before := t
982 switch t[0] {
983 case '*':
984 op = OpStar
985 case '+':
986 op = OpPlus
987 case '?':
988 op = OpQuest
989 }
990 after := t[1:]
991 if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil {
992 return nil, err
993 }
994 repeat = before
995 t = after
996 case '{':
997 op = OpRepeat
998 before := t
999 min, max, after, ok := p.parseRepeat(t)
1000 if !ok {
1001
1002 p.literal('{')
1003 t = t[1:]
1004 break
1005 }
1006 if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max {
1007
1008 return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
1009 }
1010 if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
1011 return nil, err
1012 }
1013 repeat = before
1014 t = after
1015 case '\\':
1016 if p.flags&PerlX != 0 && len(t) >= 2 {
1017 switch t[1] {
1018 case 'A':
1019 p.op(OpBeginText)
1020 t = t[2:]
1021 break BigSwitch
1022 case 'b':
1023 p.op(OpWordBoundary)
1024 t = t[2:]
1025 break BigSwitch
1026 case 'B':
1027 p.op(OpNoWordBoundary)
1028 t = t[2:]
1029 break BigSwitch
1030 case 'C':
1031
1032 return nil, &Error{ErrInvalidEscape, t[:2]}
1033 case 'Q':
1034
1035 var lit string
1036 lit, t, _ = strings.Cut(t[2:], `\E`)
1037 for lit != "" {
1038 c, rest, err := nextRune(lit)
1039 if err != nil {
1040 return nil, err
1041 }
1042 p.literal(c)
1043 lit = rest
1044 }
1045 break BigSwitch
1046 case 'z':
1047 p.op(OpEndText)
1048 t = t[2:]
1049 break BigSwitch
1050 }
1051 }
1052
1053 re := p.newRegexp(OpCharClass)
1054 re.Flags = p.flags
1055
1056
1057 if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {
1058 r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0])
1059 if err != nil {
1060 return nil, err
1061 }
1062 if r != nil {
1063 re.Rune = r
1064 t = rest
1065 p.push(re)
1066 break BigSwitch
1067 }
1068 }
1069
1070
1071 if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {
1072 re.Rune = r
1073 t = rest
1074 p.push(re)
1075 break BigSwitch
1076 }
1077 p.reuse(re)
1078
1079
1080 if c, t, err = p.parseEscape(t); err != nil {
1081 return nil, err
1082 }
1083 p.literal(c)
1084 }
1085 lastRepeat = repeat
1086 }
1087
1088 p.concat()
1089 if p.swapVerticalBar() {
1090
1091 p.stack = p.stack[:len(p.stack)-1]
1092 }
1093 p.alternate()
1094
1095 n := len(p.stack)
1096 if n != 1 {
1097 return nil, &Error{ErrMissingParen, s}
1098 }
1099 return p.stack[0], nil
1100 }
1101
1102
1103
1104
1105 func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
1106 if s == "" || s[0] != '{' {
1107 return
1108 }
1109 s = s[1:]
1110 var ok1 bool
1111 if min, s, ok1 = p.parseInt(s); !ok1 {
1112 return
1113 }
1114 if s == "" {
1115 return
1116 }
1117 if s[0] != ',' {
1118 max = min
1119 } else {
1120 s = s[1:]
1121 if s == "" {
1122 return
1123 }
1124 if s[0] == '}' {
1125 max = -1
1126 } else if max, s, ok1 = p.parseInt(s); !ok1 {
1127 return
1128 } else if max < 0 {
1129
1130 min = -1
1131 }
1132 }
1133 if s == "" || s[0] != '}' {
1134 return
1135 }
1136 rest = s[1:]
1137 ok = true
1138 return
1139 }
1140
1141
1142
1143
1144 func (p *parser) parsePerlFlags(s string) (rest string, err error) {
1145 t := s
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162 startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<'
1163 startsWithName := len(t) > 3 && t[2] == '<'
1164
1165 if startsWithP || startsWithName {
1166
1167 exprStartPos := 4
1168 if startsWithName {
1169 exprStartPos = 3
1170 }
1171
1172
1173 end := strings.IndexRune(t, '>')
1174 if end < 0 {
1175 if err = checkUTF8(t); err != nil {
1176 return "", err
1177 }
1178 return "", &Error{ErrInvalidNamedCapture, s}
1179 }
1180
1181 capture := t[:end+1]
1182 name := t[exprStartPos:end]
1183 if err = checkUTF8(name); err != nil {
1184 return "", err
1185 }
1186 if !isValidCaptureName(name) {
1187 return "", &Error{ErrInvalidNamedCapture, capture}
1188 }
1189
1190
1191 p.numCap++
1192 re := p.op(opLeftParen)
1193 re.Cap = p.numCap
1194 re.Name = name
1195 return t[end+1:], nil
1196 }
1197
1198
1199 var c rune
1200 t = t[2:]
1201 flags := p.flags
1202 sign := +1
1203 sawFlag := false
1204 Loop:
1205 for t != "" {
1206 if c, t, err = nextRune(t); err != nil {
1207 return "", err
1208 }
1209 switch c {
1210 default:
1211 break Loop
1212
1213
1214 case 'i':
1215 flags |= FoldCase
1216 sawFlag = true
1217 case 'm':
1218 flags &^= OneLine
1219 sawFlag = true
1220 case 's':
1221 flags |= DotNL
1222 sawFlag = true
1223 case 'U':
1224 flags |= NonGreedy
1225 sawFlag = true
1226
1227
1228 case '-':
1229 if sign < 0 {
1230 break Loop
1231 }
1232 sign = -1
1233
1234
1235 flags = ^flags
1236 sawFlag = false
1237
1238
1239 case ':', ')':
1240 if sign < 0 {
1241 if !sawFlag {
1242 break Loop
1243 }
1244 flags = ^flags
1245 }
1246 if c == ':' {
1247
1248 p.op(opLeftParen)
1249 }
1250 p.flags = flags
1251 return t, nil
1252 }
1253 }
1254
1255 return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]}
1256 }
1257
1258
1259
1260
1261
1262
1263 func isValidCaptureName(name string) bool {
1264 if name == "" {
1265 return false
1266 }
1267 for _, c := range name {
1268 if c != '_' && !isalnum(c) {
1269 return false
1270 }
1271 }
1272 return true
1273 }
1274
1275
1276 func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
1277 if s == "" || s[0] < '0' || '9' < s[0] {
1278 return
1279 }
1280
1281 if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
1282 return
1283 }
1284 t := s
1285 for s != "" && '0' <= s[0] && s[0] <= '9' {
1286 s = s[1:]
1287 }
1288 rest = s
1289 ok = true
1290
1291 t = t[:len(t)-len(s)]
1292 for i := 0; i < len(t); i++ {
1293
1294 if n >= 1e8 {
1295 n = -1
1296 break
1297 }
1298 n = n*10 + int(t[i]) - '0'
1299 }
1300 return
1301 }
1302
1303
1304
1305 func isCharClass(re *Regexp) bool {
1306 return re.Op == OpLiteral && len(re.Rune) == 1 ||
1307 re.Op == OpCharClass ||
1308 re.Op == OpAnyCharNotNL ||
1309 re.Op == OpAnyChar
1310 }
1311
1312
1313 func matchRune(re *Regexp, r rune) bool {
1314 switch re.Op {
1315 case OpLiteral:
1316 return len(re.Rune) == 1 && re.Rune[0] == r
1317 case OpCharClass:
1318 for i := 0; i < len(re.Rune); i += 2 {
1319 if re.Rune[i] <= r && r <= re.Rune[i+1] {
1320 return true
1321 }
1322 }
1323 return false
1324 case OpAnyCharNotNL:
1325 return r != '\n'
1326 case OpAnyChar:
1327 return true
1328 }
1329 return false
1330 }
1331
1332
1333 func (p *parser) parseVerticalBar() error {
1334 p.concat()
1335
1336
1337
1338
1339
1340 if !p.swapVerticalBar() {
1341 p.op(opVerticalBar)
1342 }
1343
1344 return nil
1345 }
1346
1347
1348
1349
1350 func mergeCharClass(dst, src *Regexp) {
1351 switch dst.Op {
1352 case OpAnyChar:
1353
1354 case OpAnyCharNotNL:
1355
1356 if matchRune(src, '\n') {
1357 dst.Op = OpAnyChar
1358 }
1359 case OpCharClass:
1360
1361 if src.Op == OpLiteral {
1362 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1363 } else {
1364 dst.Rune = appendClass(dst.Rune, src.Rune)
1365 }
1366 case OpLiteral:
1367
1368 if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags {
1369 break
1370 }
1371 dst.Op = OpCharClass
1372 dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags)
1373 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1374 }
1375 }
1376
1377
1378
1379
1380 func (p *parser) swapVerticalBar() bool {
1381
1382
1383 n := len(p.stack)
1384 if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {
1385 re1 := p.stack[n-1]
1386 re3 := p.stack[n-3]
1387
1388 if re1.Op > re3.Op {
1389 re1, re3 = re3, re1
1390 p.stack[n-3] = re3
1391 }
1392 mergeCharClass(re3, re1)
1393 p.reuse(re1)
1394 p.stack = p.stack[:n-1]
1395 return true
1396 }
1397
1398 if n >= 2 {
1399 re1 := p.stack[n-1]
1400 re2 := p.stack[n-2]
1401 if re2.Op == opVerticalBar {
1402 if n >= 3 {
1403
1404
1405 cleanAlt(p.stack[n-3])
1406 }
1407 p.stack[n-2] = re1
1408 p.stack[n-1] = re2
1409 return true
1410 }
1411 }
1412 return false
1413 }
1414
1415
1416 func (p *parser) parseRightParen() error {
1417 p.concat()
1418 if p.swapVerticalBar() {
1419
1420 p.stack = p.stack[:len(p.stack)-1]
1421 }
1422 p.alternate()
1423
1424 n := len(p.stack)
1425 if n < 2 {
1426 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1427 }
1428 re1 := p.stack[n-1]
1429 re2 := p.stack[n-2]
1430 p.stack = p.stack[:n-2]
1431 if re2.Op != opLeftParen {
1432 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1433 }
1434
1435 p.flags = re2.Flags
1436 if re2.Cap == 0 {
1437
1438 p.push(re1)
1439 } else {
1440 re2.Op = OpCapture
1441 re2.Sub = re2.Sub0[:1]
1442 re2.Sub[0] = re1
1443 p.push(re2)
1444 }
1445 return nil
1446 }
1447
1448
1449
1450 func (p *parser) parseEscape(s string) (r rune, rest string, err error) {
1451 t := s[1:]
1452 if t == "" {
1453 return 0, "", &Error{ErrTrailingBackslash, ""}
1454 }
1455 c, t, err := nextRune(t)
1456 if err != nil {
1457 return 0, "", err
1458 }
1459
1460 Switch:
1461 switch c {
1462 default:
1463 if c < utf8.RuneSelf && !isalnum(c) {
1464
1465
1466
1467
1468 return c, t, nil
1469 }
1470
1471
1472 case '1', '2', '3', '4', '5', '6', '7':
1473
1474 if t == "" || t[0] < '0' || t[0] > '7' {
1475 break
1476 }
1477 fallthrough
1478 case '0':
1479
1480 r = c - '0'
1481 for i := 1; i < 3; i++ {
1482 if t == "" || t[0] < '0' || t[0] > '7' {
1483 break
1484 }
1485 r = r*8 + rune(t[0]) - '0'
1486 t = t[1:]
1487 }
1488 return r, t, nil
1489
1490
1491 case 'x':
1492 if t == "" {
1493 break
1494 }
1495 if c, t, err = nextRune(t); err != nil {
1496 return 0, "", err
1497 }
1498 if c == '{' {
1499
1500
1501
1502
1503 nhex := 0
1504 r = 0
1505 for {
1506 if t == "" {
1507 break Switch
1508 }
1509 if c, t, err = nextRune(t); err != nil {
1510 return 0, "", err
1511 }
1512 if c == '}' {
1513 break
1514 }
1515 v := unhex(c)
1516 if v < 0 {
1517 break Switch
1518 }
1519 r = r*16 + v
1520 if r > unicode.MaxRune {
1521 break Switch
1522 }
1523 nhex++
1524 }
1525 if nhex == 0 {
1526 break Switch
1527 }
1528 return r, t, nil
1529 }
1530
1531
1532 x := unhex(c)
1533 if c, t, err = nextRune(t); err != nil {
1534 return 0, "", err
1535 }
1536 y := unhex(c)
1537 if x < 0 || y < 0 {
1538 break
1539 }
1540 return x*16 + y, t, nil
1541
1542
1543
1544
1545
1546
1547
1548 case 'a':
1549 return '\a', t, err
1550 case 'f':
1551 return '\f', t, err
1552 case 'n':
1553 return '\n', t, err
1554 case 'r':
1555 return '\r', t, err
1556 case 't':
1557 return '\t', t, err
1558 case 'v':
1559 return '\v', t, err
1560 }
1561 return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]}
1562 }
1563
1564
1565
1566 func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) {
1567 if s == "" {
1568 return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
1569 }
1570
1571
1572
1573 if s[0] == '\\' {
1574 return p.parseEscape(s)
1575 }
1576
1577 return nextRune(s)
1578 }
1579
1580 type charGroup struct {
1581 sign int
1582 class []rune
1583 }
1584
1585
1586
1587
1588 func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
1589 if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
1590 return
1591 }
1592 g := perlGroup[s[0:2]]
1593 if g.sign == 0 {
1594 return
1595 }
1596 return p.appendGroup(r, g), s[2:]
1597 }
1598
1599
1600
1601
1602 func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) {
1603 if len(s) < 2 || s[0] != '[' || s[1] != ':' {
1604 return
1605 }
1606
1607 i := strings.Index(s[2:], ":]")
1608 if i < 0 {
1609 return
1610 }
1611 i += 2
1612 name, s := s[0:i+2], s[i+2:]
1613 g := posixGroup[name]
1614 if g.sign == 0 {
1615 return nil, "", &Error{ErrInvalidCharRange, name}
1616 }
1617 return p.appendGroup(r, g), s, nil
1618 }
1619
1620 func (p *parser) appendGroup(r []rune, g charGroup) []rune {
1621 if p.flags&FoldCase == 0 {
1622 if g.sign < 0 {
1623 r = appendNegatedClass(r, g.class)
1624 } else {
1625 r = appendClass(r, g.class)
1626 }
1627 } else {
1628 tmp := p.tmpClass[:0]
1629 tmp = appendFoldedClass(tmp, g.class)
1630 p.tmpClass = tmp
1631 tmp = cleanClass(&p.tmpClass)
1632 if g.sign < 0 {
1633 r = appendNegatedClass(r, tmp)
1634 } else {
1635 r = appendClass(r, tmp)
1636 }
1637 }
1638 return r
1639 }
1640
1641 var anyTable = &unicode.RangeTable{
1642 R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
1643 R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
1644 }
1645
1646
1647
1648 func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
1649
1650 if name == "Any" {
1651 return anyTable, anyTable
1652 }
1653 if t := unicode.Categories[name]; t != nil {
1654 return t, unicode.FoldCategory[name]
1655 }
1656 if t := unicode.Scripts[name]; t != nil {
1657 return t, unicode.FoldScript[name]
1658 }
1659 return nil, nil
1660 }
1661
1662
1663
1664
1665 func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) {
1666 if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
1667 return
1668 }
1669
1670
1671 sign := +1
1672 if s[1] == 'P' {
1673 sign = -1
1674 }
1675 t := s[2:]
1676 c, t, err := nextRune(t)
1677 if err != nil {
1678 return
1679 }
1680 var seq, name string
1681 if c != '{' {
1682
1683 seq = s[:len(s)-len(t)]
1684 name = seq[2:]
1685 } else {
1686
1687 end := strings.IndexRune(s, '}')
1688 if end < 0 {
1689 if err = checkUTF8(s); err != nil {
1690 return
1691 }
1692 return nil, "", &Error{ErrInvalidCharRange, s}
1693 }
1694 seq, t = s[:end+1], s[end+1:]
1695 name = s[3:end]
1696 if err = checkUTF8(name); err != nil {
1697 return
1698 }
1699 }
1700
1701
1702 if name != "" && name[0] == '^' {
1703 sign = -sign
1704 name = name[1:]
1705 }
1706
1707 tab, fold := unicodeTable(name)
1708 if tab == nil {
1709 return nil, "", &Error{ErrInvalidCharRange, seq}
1710 }
1711
1712 if p.flags&FoldCase == 0 || fold == nil {
1713 if sign > 0 {
1714 r = appendTable(r, tab)
1715 } else {
1716 r = appendNegatedTable(r, tab)
1717 }
1718 } else {
1719
1720
1721
1722 tmp := p.tmpClass[:0]
1723 tmp = appendTable(tmp, tab)
1724 tmp = appendTable(tmp, fold)
1725 p.tmpClass = tmp
1726 tmp = cleanClass(&p.tmpClass)
1727 if sign > 0 {
1728 r = appendClass(r, tmp)
1729 } else {
1730 r = appendNegatedClass(r, tmp)
1731 }
1732 }
1733 return r, t, nil
1734 }
1735
1736
1737
1738 func (p *parser) parseClass(s string) (rest string, err error) {
1739 t := s[1:]
1740 re := p.newRegexp(OpCharClass)
1741 re.Flags = p.flags
1742 re.Rune = re.Rune0[:0]
1743
1744 sign := +1
1745 if t != "" && t[0] == '^' {
1746 sign = -1
1747 t = t[1:]
1748
1749
1750
1751 if p.flags&ClassNL == 0 {
1752 re.Rune = append(re.Rune, '\n', '\n')
1753 }
1754 }
1755
1756 class := re.Rune
1757 first := true
1758 for t == "" || t[0] != ']' || first {
1759
1760
1761 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {
1762 _, size := utf8.DecodeRuneInString(t[1:])
1763 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}
1764 }
1765 first = false
1766
1767
1768 if len(t) > 2 && t[0] == '[' && t[1] == ':' {
1769 nclass, nt, err := p.parseNamedClass(t, class)
1770 if err != nil {
1771 return "", err
1772 }
1773 if nclass != nil {
1774 class, t = nclass, nt
1775 continue
1776 }
1777 }
1778
1779
1780 nclass, nt, err := p.parseUnicodeClass(t, class)
1781 if err != nil {
1782 return "", err
1783 }
1784 if nclass != nil {
1785 class, t = nclass, nt
1786 continue
1787 }
1788
1789
1790 if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil {
1791 class, t = nclass, nt
1792 continue
1793 }
1794
1795
1796 rng := t
1797 var lo, hi rune
1798 if lo, t, err = p.parseClassChar(t, s); err != nil {
1799 return "", err
1800 }
1801 hi = lo
1802
1803 if len(t) >= 2 && t[0] == '-' && t[1] != ']' {
1804 t = t[1:]
1805 if hi, t, err = p.parseClassChar(t, s); err != nil {
1806 return "", err
1807 }
1808 if hi < lo {
1809 rng = rng[:len(rng)-len(t)]
1810 return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
1811 }
1812 }
1813 if p.flags&FoldCase == 0 {
1814 class = appendRange(class, lo, hi)
1815 } else {
1816 class = appendFoldedRange(class, lo, hi)
1817 }
1818 }
1819 t = t[1:]
1820
1821
1822 re.Rune = class
1823 class = cleanClass(&re.Rune)
1824 if sign < 0 {
1825 class = negateClass(class)
1826 }
1827 re.Rune = class
1828 p.push(re)
1829 return t, nil
1830 }
1831
1832
1833
1834 func cleanClass(rp *[]rune) []rune {
1835
1836
1837 sort.Sort(ranges{rp})
1838
1839 r := *rp
1840 if len(r) < 2 {
1841 return r
1842 }
1843
1844
1845 w := 2
1846 for i := 2; i < len(r); i += 2 {
1847 lo, hi := r[i], r[i+1]
1848 if lo <= r[w-1]+1 {
1849
1850 if hi > r[w-1] {
1851 r[w-1] = hi
1852 }
1853 continue
1854 }
1855
1856 r[w] = lo
1857 r[w+1] = hi
1858 w += 2
1859 }
1860
1861 return r[:w]
1862 }
1863
1864
1865
1866 func inCharClass(r rune, class []rune) bool {
1867 _, ok := sort.Find(len(class)/2, func(i int) int {
1868 lo, hi := class[2*i], class[2*i+1]
1869 if r > hi {
1870 return +1
1871 }
1872 if r < lo {
1873 return -1
1874 }
1875 return 0
1876 })
1877 return ok
1878 }
1879
1880
1881 func appendLiteral(r []rune, x rune, flags Flags) []rune {
1882 if flags&FoldCase != 0 {
1883 return appendFoldedRange(r, x, x)
1884 }
1885 return appendRange(r, x, x)
1886 }
1887
1888
1889 func appendRange(r []rune, lo, hi rune) []rune {
1890
1891
1892
1893
1894 n := len(r)
1895 for i := 2; i <= 4; i += 2 {
1896 if n >= i {
1897 rlo, rhi := r[n-i], r[n-i+1]
1898 if lo <= rhi+1 && rlo <= hi+1 {
1899 if lo < rlo {
1900 r[n-i] = lo
1901 }
1902 if hi > rhi {
1903 r[n-i+1] = hi
1904 }
1905 return r
1906 }
1907 }
1908 }
1909
1910 return append(r, lo, hi)
1911 }
1912
1913 const (
1914
1915
1916 minFold = 0x0041
1917 maxFold = 0x1e943
1918 )
1919
1920
1921
1922 func appendFoldedRange(r []rune, lo, hi rune) []rune {
1923
1924 if lo <= minFold && hi >= maxFold {
1925
1926 return appendRange(r, lo, hi)
1927 }
1928 if hi < minFold || lo > maxFold {
1929
1930 return appendRange(r, lo, hi)
1931 }
1932 if lo < minFold {
1933
1934 r = appendRange(r, lo, minFold-1)
1935 lo = minFold
1936 }
1937 if hi > maxFold {
1938
1939 r = appendRange(r, maxFold+1, hi)
1940 hi = maxFold
1941 }
1942
1943
1944 for c := lo; c <= hi; c++ {
1945 r = appendRange(r, c, c)
1946 f := unicode.SimpleFold(c)
1947 for f != c {
1948 r = appendRange(r, f, f)
1949 f = unicode.SimpleFold(f)
1950 }
1951 }
1952 return r
1953 }
1954
1955
1956
1957 func appendClass(r []rune, x []rune) []rune {
1958 for i := 0; i < len(x); i += 2 {
1959 r = appendRange(r, x[i], x[i+1])
1960 }
1961 return r
1962 }
1963
1964
1965 func appendFoldedClass(r []rune, x []rune) []rune {
1966 for i := 0; i < len(x); i += 2 {
1967 r = appendFoldedRange(r, x[i], x[i+1])
1968 }
1969 return r
1970 }
1971
1972
1973
1974 func appendNegatedClass(r []rune, x []rune) []rune {
1975 nextLo := '\u0000'
1976 for i := 0; i < len(x); i += 2 {
1977 lo, hi := x[i], x[i+1]
1978 if nextLo <= lo-1 {
1979 r = appendRange(r, nextLo, lo-1)
1980 }
1981 nextLo = hi + 1
1982 }
1983 if nextLo <= unicode.MaxRune {
1984 r = appendRange(r, nextLo, unicode.MaxRune)
1985 }
1986 return r
1987 }
1988
1989
1990 func appendTable(r []rune, x *unicode.RangeTable) []rune {
1991 for _, xr := range x.R16 {
1992 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1993 if stride == 1 {
1994 r = appendRange(r, lo, hi)
1995 continue
1996 }
1997 for c := lo; c <= hi; c += stride {
1998 r = appendRange(r, c, c)
1999 }
2000 }
2001 for _, xr := range x.R32 {
2002 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2003 if stride == 1 {
2004 r = appendRange(r, lo, hi)
2005 continue
2006 }
2007 for c := lo; c <= hi; c += stride {
2008 r = appendRange(r, c, c)
2009 }
2010 }
2011 return r
2012 }
2013
2014
2015 func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
2016 nextLo := '\u0000'
2017 for _, xr := range x.R16 {
2018 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2019 if stride == 1 {
2020 if nextLo <= lo-1 {
2021 r = appendRange(r, nextLo, lo-1)
2022 }
2023 nextLo = hi + 1
2024 continue
2025 }
2026 for c := lo; c <= hi; c += stride {
2027 if nextLo <= c-1 {
2028 r = appendRange(r, nextLo, c-1)
2029 }
2030 nextLo = c + 1
2031 }
2032 }
2033 for _, xr := range x.R32 {
2034 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2035 if stride == 1 {
2036 if nextLo <= lo-1 {
2037 r = appendRange(r, nextLo, lo-1)
2038 }
2039 nextLo = hi + 1
2040 continue
2041 }
2042 for c := lo; c <= hi; c += stride {
2043 if nextLo <= c-1 {
2044 r = appendRange(r, nextLo, c-1)
2045 }
2046 nextLo = c + 1
2047 }
2048 }
2049 if nextLo <= unicode.MaxRune {
2050 r = appendRange(r, nextLo, unicode.MaxRune)
2051 }
2052 return r
2053 }
2054
2055
2056
2057 func negateClass(r []rune) []rune {
2058 nextLo := '\u0000'
2059 w := 0
2060 for i := 0; i < len(r); i += 2 {
2061 lo, hi := r[i], r[i+1]
2062 if nextLo <= lo-1 {
2063 r[w] = nextLo
2064 r[w+1] = lo - 1
2065 w += 2
2066 }
2067 nextLo = hi + 1
2068 }
2069 r = r[:w]
2070 if nextLo <= unicode.MaxRune {
2071
2072
2073 r = append(r, nextLo, unicode.MaxRune)
2074 }
2075 return r
2076 }
2077
2078
2079
2080
2081
2082 type ranges struct {
2083 p *[]rune
2084 }
2085
2086 func (ra ranges) Less(i, j int) bool {
2087 p := *ra.p
2088 i *= 2
2089 j *= 2
2090 return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
2091 }
2092
2093 func (ra ranges) Len() int {
2094 return len(*ra.p) / 2
2095 }
2096
2097 func (ra ranges) Swap(i, j int) {
2098 p := *ra.p
2099 i *= 2
2100 j *= 2
2101 p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
2102 }
2103
2104 func checkUTF8(s string) error {
2105 for s != "" {
2106 rune, size := utf8.DecodeRuneInString(s)
2107 if rune == utf8.RuneError && size == 1 {
2108 return &Error{Code: ErrInvalidUTF8, Expr: s}
2109 }
2110 s = s[size:]
2111 }
2112 return nil
2113 }
2114
2115 func nextRune(s string) (c rune, t string, err error) {
2116 c, size := utf8.DecodeRuneInString(s)
2117 if c == utf8.RuneError && size == 1 {
2118 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
2119 }
2120 return c, s[size:], nil
2121 }
2122
2123 func isalnum(c rune) bool {
2124 return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
2125 }
2126
2127 func unhex(c rune) rune {
2128 if '0' <= c && c <= '9' {
2129 return c - '0'
2130 }
2131 if 'a' <= c && c <= 'f' {
2132 return c - 'a' + 10
2133 }
2134 if 'A' <= c && c <= 'F' {
2135 return c - 'A' + 10
2136 }
2137 return -1
2138 }
2139
View as plain text