Source file
src/regexp/exec_test.go
Documentation: regexp
1
2
3
4
5 package regexp
6
7 import (
8 "bufio"
9 "compress/bzip2"
10 "fmt"
11 "internal/testenv"
12 "io"
13 "os"
14 "path/filepath"
15 "regexp/syntax"
16 "strconv"
17 "strings"
18 "testing"
19 "unicode/utf8"
20 )
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 func TestRE2Search(t *testing.T) {
66 testRE2(t, "testdata/re2-search.txt")
67 }
68
69 func testRE2(t *testing.T, file string) {
70 f, err := os.Open(file)
71 if err != nil {
72 t.Fatal(err)
73 }
74 defer f.Close()
75 var txt io.Reader
76 if strings.HasSuffix(file, ".bz2") {
77 z := bzip2.NewReader(f)
78 txt = z
79 file = file[:len(file)-len(".bz2")]
80 } else {
81 txt = f
82 }
83 lineno := 0
84 scanner := bufio.NewScanner(txt)
85 var (
86 str []string
87 input []string
88 inStrings bool
89 re *Regexp
90 refull *Regexp
91 nfail int
92 ncase int
93 )
94 for lineno := 1; scanner.Scan(); lineno++ {
95 line := scanner.Text()
96 switch {
97 case line == "":
98 t.Fatalf("%s:%d: unexpected blank line", file, lineno)
99 case line[0] == '#':
100 continue
101 case 'A' <= line[0] && line[0] <= 'Z':
102
103 t.Logf("%s\n", line)
104 continue
105 case line == "strings":
106 str = str[:0]
107 inStrings = true
108 case line == "regexps":
109 inStrings = false
110 case line[0] == '"':
111 q, err := strconv.Unquote(line)
112 if err != nil {
113
114 t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
115 }
116 if inStrings {
117 str = append(str, q)
118 continue
119 }
120
121 if len(input) != 0 {
122 t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
123 }
124 re, err = tryCompile(q)
125 if err != nil {
126 if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
127
128 continue
129 }
130 t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
131 if nfail++; nfail >= 100 {
132 t.Fatalf("stopping after %d errors", nfail)
133 }
134 continue
135 }
136 full := `\A(?:` + q + `)\z`
137 refull, err = tryCompile(full)
138 if err != nil {
139
140 t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
141 }
142 input = str
143 case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
144
145 ncase++
146 if re == nil {
147
148 continue
149 }
150 if len(input) == 0 {
151 t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
152 }
153 var text string
154 text, input = input[0], input[1:]
155 if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
156
157
158
159
160
161 continue
162 }
163 res := strings.Split(line, ";")
164 if len(res) != len(run) {
165 t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
166 }
167 for i := range res {
168 have, suffix := run[i](re, refull, text)
169 want := parseResult(t, file, lineno, res[i])
170 if !same(have, want) {
171 t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
172 if nfail++; nfail >= 100 {
173 t.Fatalf("stopping after %d errors", nfail)
174 }
175 continue
176 }
177 b, suffix := match[i](re, refull, text)
178 if b != (want != nil) {
179 t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
180 if nfail++; nfail >= 100 {
181 t.Fatalf("stopping after %d errors", nfail)
182 }
183 continue
184 }
185 }
186
187 default:
188 t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
189 }
190 }
191 if err := scanner.Err(); err != nil {
192 t.Fatalf("%s:%d: %v", file, lineno, err)
193 }
194 if len(input) != 0 {
195 t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
196 }
197 t.Logf("%d cases tested", ncase)
198 }
199
200 var run = []func(*Regexp, *Regexp, string) ([]int, string){
201 runFull,
202 runPartial,
203 runFullLongest,
204 runPartialLongest,
205 }
206
207 func runFull(re, refull *Regexp, text string) ([]int, string) {
208 refull.longest = false
209 return refull.FindStringSubmatchIndex(text), "[full]"
210 }
211
212 func runPartial(re, refull *Regexp, text string) ([]int, string) {
213 re.longest = false
214 return re.FindStringSubmatchIndex(text), ""
215 }
216
217 func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
218 refull.longest = true
219 return refull.FindStringSubmatchIndex(text), "[full,longest]"
220 }
221
222 func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
223 re.longest = true
224 return re.FindStringSubmatchIndex(text), "[longest]"
225 }
226
227 var match = []func(*Regexp, *Regexp, string) (bool, string){
228 matchFull,
229 matchPartial,
230 matchFullLongest,
231 matchPartialLongest,
232 }
233
234 func matchFull(re, refull *Regexp, text string) (bool, string) {
235 refull.longest = false
236 return refull.MatchString(text), "[full]"
237 }
238
239 func matchPartial(re, refull *Regexp, text string) (bool, string) {
240 re.longest = false
241 return re.MatchString(text), ""
242 }
243
244 func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
245 refull.longest = true
246 return refull.MatchString(text), "[full,longest]"
247 }
248
249 func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
250 re.longest = true
251 return re.MatchString(text), "[longest]"
252 }
253
254 func isSingleBytes(s string) bool {
255 for _, c := range s {
256 if c >= utf8.RuneSelf {
257 return false
258 }
259 }
260 return true
261 }
262
263 func tryCompile(s string) (re *Regexp, err error) {
264
265 defer func() {
266 if r := recover(); r != nil {
267 err = fmt.Errorf("panic: %v", r)
268 }
269 }()
270 return Compile(s)
271 }
272
273 func parseResult(t *testing.T, file string, lineno int, res string) []int {
274
275 if res == "-" {
276 return nil
277 }
278
279 n := 1
280 for j := 0; j < len(res); j++ {
281 if res[j] == ' ' {
282 n++
283 }
284 }
285 out := make([]int, 2*n)
286 i := 0
287 n = 0
288 for j := 0; j <= len(res); j++ {
289 if j == len(res) || res[j] == ' ' {
290
291 pair := res[i:j]
292 if pair == "-" {
293 out[n] = -1
294 out[n+1] = -1
295 } else {
296 loStr, hiStr, _ := strings.Cut(pair, "-")
297 lo, err1 := strconv.Atoi(loStr)
298 hi, err2 := strconv.Atoi(hiStr)
299 if err1 != nil || err2 != nil || lo > hi {
300 t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
301 }
302 out[n] = lo
303 out[n+1] = hi
304 }
305 n += 2
306 i = j + 1
307 }
308 }
309 return out
310 }
311
312 func same(x, y []int) bool {
313 if len(x) != len(y) {
314 return false
315 }
316 for i, xi := range x {
317 if xi != y[i] {
318 return false
319 }
320 }
321 return true
322 }
323
324
325
326
327 func TestFowler(t *testing.T) {
328 files, err := filepath.Glob("testdata/*.dat")
329 if err != nil {
330 t.Fatal(err)
331 }
332 for _, file := range files {
333 t.Log(file)
334 testFowler(t, file)
335 }
336 }
337
338 var notab = MustCompilePOSIX(`[^\t]+`)
339
340 func testFowler(t *testing.T, file string) {
341 f, err := os.Open(file)
342 if err != nil {
343 t.Error(err)
344 return
345 }
346 defer f.Close()
347 b := bufio.NewReader(f)
348 lineno := 0
349 lastRegexp := ""
350 Reading:
351 for {
352 lineno++
353 line, err := b.ReadString('\n')
354 if err != nil {
355 if err != io.EOF {
356 t.Errorf("%s:%d: %v", file, lineno, err)
357 }
358 break Reading
359 }
360
361
362
363
364
365
366
367
368 if line[0] == '#' || line[0] == '\n' {
369 continue Reading
370 }
371 line = line[:len(line)-1]
372 field := notab.FindAllString(line, -1)
373 for i, f := range field {
374 if f == "NULL" {
375 field[i] = ""
376 }
377 if f == "NIL" {
378 t.Logf("%s:%d: skip: %s", file, lineno, line)
379 continue Reading
380 }
381 }
382 if len(field) == 0 {
383 continue Reading
384 }
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446 flag := field[0]
447 switch flag[0] {
448 case '?', '&', '|', ';', '{', '}':
449
450
451 flag = flag[1:]
452 if flag == "" {
453 continue Reading
454 }
455 case ':':
456 var ok bool
457 if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
458 t.Logf("skip: %s", line)
459 continue Reading
460 }
461 case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
462 t.Logf("skip: %s", line)
463 continue Reading
464 }
465
466
467 if len(field) < 4 {
468 t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
469 continue Reading
470 }
471
472
473 if strings.Contains(flag, "$") {
474 f := `"` + field[1] + `"`
475 if field[1], err = strconv.Unquote(f); err != nil {
476 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
477 }
478 f = `"` + field[2] + `"`
479 if field[2], err = strconv.Unquote(f); err != nil {
480 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
481 }
482 }
483
484
485
486
487 if field[1] == "SAME" {
488 field[1] = lastRegexp
489 }
490 lastRegexp = field[1]
491
492
493 text := field[2]
494
495
496 ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
497 if !ok {
498 t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
499 continue Reading
500 }
501
502
503
504 Testing:
505
506 for _, c := range flag {
507 pattern := field[1]
508 syn := syntax.POSIX | syntax.ClassNL
509 switch c {
510 default:
511 continue Testing
512 case 'E':
513
514 case 'L':
515
516 pattern = QuoteMeta(pattern)
517 }
518
519 for _, c := range flag {
520 switch c {
521 case 'i':
522 syn |= syntax.FoldCase
523 }
524 }
525
526 re, err := compile(pattern, syn, true)
527 if err != nil {
528 if shouldCompile {
529 t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
530 }
531 continue Testing
532 }
533 if !shouldCompile {
534 t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
535 continue Testing
536 }
537 match := re.MatchString(text)
538 if match != shouldMatch {
539 t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
540 continue Testing
541 }
542 have := re.FindStringSubmatchIndex(text)
543 if (len(have) > 0) != match {
544 t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
545 continue Testing
546 }
547 if len(have) > len(pos) {
548 have = have[:len(pos)]
549 }
550 if !same(have, pos) {
551 t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
552 }
553 }
554 }
555 }
556
557 func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572 switch {
573 case s == "":
574
575 ok = true
576 compiled = true
577 matched = true
578 return
579 case s == "NOMATCH":
580
581 ok = true
582 compiled = true
583 matched = false
584 return
585 case 'A' <= s[0] && s[0] <= 'Z':
586
587 ok = true
588 compiled = false
589 return
590 }
591 compiled = true
592
593 var x []int
594 for s != "" {
595 var end byte = ')'
596 if len(x)%2 == 0 {
597 if s[0] != '(' {
598 ok = false
599 return
600 }
601 s = s[1:]
602 end = ','
603 }
604 i := 0
605 for i < len(s) && s[i] != end {
606 i++
607 }
608 if i == 0 || i == len(s) {
609 ok = false
610 return
611 }
612 var v = -1
613 var err error
614 if s[:i] != "?" {
615 v, err = strconv.Atoi(s[:i])
616 if err != nil {
617 ok = false
618 return
619 }
620 }
621 x = append(x, v)
622 s = s[i+1:]
623 }
624 if len(x)%2 != 0 {
625 ok = false
626 return
627 }
628 ok = true
629 matched = true
630 pos = x
631 return
632 }
633
634 var text []byte
635
636 func makeText(n int) []byte {
637 if len(text) >= n {
638 return text[:n]
639 }
640 text = make([]byte, n)
641 x := ^uint32(0)
642 for i := range text {
643 x += x
644 x ^= 1
645 if int32(x) < 0 {
646 x ^= 0x88888eef
647 }
648 if x%31 == 0 {
649 text[i] = '\n'
650 } else {
651 text[i] = byte(x%(0x7E+1-0x20) + 0x20)
652 }
653 }
654 return text
655 }
656
657 func BenchmarkMatch(b *testing.B) {
658 isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
659
660 for _, data := range benchData {
661 r := MustCompile(data.re)
662 for _, size := range benchSizes {
663 if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
664 continue
665 }
666 t := makeText(size.n)
667 b.Run(data.name+"/"+size.name, func(b *testing.B) {
668 b.SetBytes(int64(size.n))
669 for i := 0; i < b.N; i++ {
670 if r.Match(t) {
671 b.Fatal("match!")
672 }
673 }
674 })
675 }
676 }
677 }
678
679 func BenchmarkMatch_onepass_regex(b *testing.B) {
680 isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
681 r := MustCompile(`(?s)\A.*\z`)
682 if r.onepass == nil {
683 b.Fatalf("want onepass regex, but %q is not onepass", r)
684 }
685 for _, size := range benchSizes {
686 if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
687 continue
688 }
689 t := makeText(size.n)
690 b.Run(size.name, func(b *testing.B) {
691 b.SetBytes(int64(size.n))
692 b.ReportAllocs()
693 for i := 0; i < b.N; i++ {
694 if !r.Match(t) {
695 b.Fatal("not match!")
696 }
697 }
698 })
699 }
700 }
701
702 var benchData = []struct{ name, re string }{
703 {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
704 {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
705 {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
706 {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
707 {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
708 {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
709 }
710
711 var benchSizes = []struct {
712 name string
713 n int
714 }{
715 {"16", 16},
716 {"32", 32},
717 {"1K", 1 << 10},
718 {"32K", 32 << 10},
719 {"1M", 1 << 20},
720 {"32M", 32 << 20},
721 }
722
723 func TestLongest(t *testing.T) {
724 re, err := Compile(`a(|b)`)
725 if err != nil {
726 t.Fatal(err)
727 }
728 if g, w := re.FindString("ab"), "a"; g != w {
729 t.Errorf("first match was %q, want %q", g, w)
730 }
731 re.Longest()
732 if g, w := re.FindString("ab"), "ab"; g != w {
733 t.Errorf("longest match was %q, want %q", g, w)
734 }
735 }
736
737
738
739 func TestProgramTooLongForBacktrack(t *testing.T) {
740 longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
741 if !longRegex.MatchString("two") {
742 t.Errorf("longRegex.MatchString(\"two\") was false, want true")
743 }
744 if longRegex.MatchString("xxx") {
745 t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
746 }
747 }
748
View as plain text