Source file
src/regexp/exec.go
Documentation: regexp
1
2
3
4
5 package regexp
6
7 import (
8 "io"
9 "regexp/syntax"
10 "sync"
11 )
12
13
14
15 type queue struct {
16 sparse []uint32
17 dense []entry
18 }
19
20
21
22
23
24 type entry struct {
25 pc uint32
26 t *thread
27 }
28
29
30
31
32 type thread struct {
33 inst *syntax.Inst
34 cap []int
35 }
36
37
38 type machine struct {
39 re *Regexp
40 p *syntax.Prog
41 q0, q1 queue
42 pool []*thread
43 matched bool
44 matchcap []int
45
46 inputs inputs
47 }
48
49 type inputs struct {
50
51 bytes inputBytes
52 string inputString
53 reader inputReader
54 }
55
56 func (i *inputs) newBytes(b []byte) input {
57 i.bytes.str = b
58 return &i.bytes
59 }
60
61 func (i *inputs) newString(s string) input {
62 i.string.str = s
63 return &i.string
64 }
65
66 func (i *inputs) newReader(r io.RuneReader) input {
67 i.reader.r = r
68 i.reader.atEOT = false
69 i.reader.pos = 0
70 return &i.reader
71 }
72
73 func (i *inputs) clear() {
74
75
76 if i.bytes.str != nil {
77 i.bytes.str = nil
78 } else if i.reader.r != nil {
79 i.reader.r = nil
80 } else {
81 i.string.str = ""
82 }
83 }
84
85 func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) {
86 if r != nil {
87 return i.newReader(r), 0
88 }
89 if b != nil {
90 return i.newBytes(b), len(b)
91 }
92 return i.newString(s), len(s)
93 }
94
95 func (m *machine) init(ncap int) {
96 for _, t := range m.pool {
97 t.cap = t.cap[:ncap]
98 }
99 m.matchcap = m.matchcap[:ncap]
100 }
101
102
103
104 func (m *machine) alloc(i *syntax.Inst) *thread {
105 var t *thread
106 if n := len(m.pool); n > 0 {
107 t = m.pool[n-1]
108 m.pool = m.pool[:n-1]
109 } else {
110 t = new(thread)
111 t.cap = make([]int, len(m.matchcap), cap(m.matchcap))
112 }
113 t.inst = i
114 return t
115 }
116
117
118
119
120
121
122 type lazyFlag uint64
123
124 func newLazyFlag(r1, r2 rune) lazyFlag {
125 return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2)))
126 }
127
128 func (f lazyFlag) match(op syntax.EmptyOp) bool {
129 if op == 0 {
130 return true
131 }
132 r1 := rune(f >> 32)
133 if op&syntax.EmptyBeginLine != 0 {
134 if r1 != '\n' && r1 >= 0 {
135 return false
136 }
137 op &^= syntax.EmptyBeginLine
138 }
139 if op&syntax.EmptyBeginText != 0 {
140 if r1 >= 0 {
141 return false
142 }
143 op &^= syntax.EmptyBeginText
144 }
145 if op == 0 {
146 return true
147 }
148 r2 := rune(f)
149 if op&syntax.EmptyEndLine != 0 {
150 if r2 != '\n' && r2 >= 0 {
151 return false
152 }
153 op &^= syntax.EmptyEndLine
154 }
155 if op&syntax.EmptyEndText != 0 {
156 if r2 >= 0 {
157 return false
158 }
159 op &^= syntax.EmptyEndText
160 }
161 if op == 0 {
162 return true
163 }
164 if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) {
165 op &^= syntax.EmptyWordBoundary
166 } else {
167 op &^= syntax.EmptyNoWordBoundary
168 }
169 return op == 0
170 }
171
172
173
174
175 func (m *machine) match(i input, pos int) bool {
176 startCond := m.re.cond
177 if startCond == ^syntax.EmptyOp(0) {
178 return false
179 }
180 m.matched = false
181 for i := range m.matchcap {
182 m.matchcap[i] = -1
183 }
184 runq, nextq := &m.q0, &m.q1
185 r, r1 := endOfText, endOfText
186 width, width1 := 0, 0
187 r, width = i.step(pos)
188 if r != endOfText {
189 r1, width1 = i.step(pos + width)
190 }
191 var flag lazyFlag
192 if pos == 0 {
193 flag = newLazyFlag(-1, r)
194 } else {
195 flag = i.context(pos)
196 }
197 for {
198 if len(runq.dense) == 0 {
199 if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
200
201 break
202 }
203 if m.matched {
204
205 break
206 }
207 if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() {
208
209 advance := i.index(m.re, pos)
210 if advance < 0 {
211 break
212 }
213 pos += advance
214 r, width = i.step(pos)
215 r1, width1 = i.step(pos + width)
216 }
217 }
218 if !m.matched {
219 if len(m.matchcap) > 0 {
220 m.matchcap[0] = pos
221 }
222 m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil)
223 }
224 flag = newLazyFlag(r, r1)
225 m.step(runq, nextq, pos, pos+width, r, &flag)
226 if width == 0 {
227 break
228 }
229 if len(m.matchcap) == 0 && m.matched {
230
231
232 break
233 }
234 pos += width
235 r, width = r1, width1
236 if r != endOfText {
237 r1, width1 = i.step(pos + width)
238 }
239 runq, nextq = nextq, runq
240 }
241 m.clear(nextq)
242 return m.matched
243 }
244
245
246 func (m *machine) clear(q *queue) {
247 for _, d := range q.dense {
248 if d.t != nil {
249 m.pool = append(m.pool, d.t)
250 }
251 }
252 q.dense = q.dense[:0]
253 }
254
255
256
257
258
259
260 func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) {
261 longest := m.re.longest
262 for j := 0; j < len(runq.dense); j++ {
263 d := &runq.dense[j]
264 t := d.t
265 if t == nil {
266 continue
267 }
268 if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
269 m.pool = append(m.pool, t)
270 continue
271 }
272 i := t.inst
273 add := false
274 switch i.Op {
275 default:
276 panic("bad inst")
277
278 case syntax.InstMatch:
279 if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) {
280 t.cap[1] = pos
281 copy(m.matchcap, t.cap)
282 }
283 if !longest {
284
285 for _, d := range runq.dense[j+1:] {
286 if d.t != nil {
287 m.pool = append(m.pool, d.t)
288 }
289 }
290 runq.dense = runq.dense[:0]
291 }
292 m.matched = true
293
294 case syntax.InstRune:
295 add = i.MatchRune(c)
296 case syntax.InstRune1:
297 add = c == i.Rune[0]
298 case syntax.InstRuneAny:
299 add = true
300 case syntax.InstRuneAnyNotNL:
301 add = c != '\n'
302 }
303 if add {
304 t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t)
305 }
306 if t != nil {
307 m.pool = append(m.pool, t)
308 }
309 }
310 runq.dense = runq.dense[:0]
311 }
312
313
314
315
316
317 func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread {
318 Again:
319 if pc == 0 {
320 return t
321 }
322 if j := q.sparse[pc]; j < uint32(len(q.dense)) && q.dense[j].pc == pc {
323 return t
324 }
325
326 j := len(q.dense)
327 q.dense = q.dense[:j+1]
328 d := &q.dense[j]
329 d.t = nil
330 d.pc = pc
331 q.sparse[pc] = uint32(j)
332
333 i := &m.p.Inst[pc]
334 switch i.Op {
335 default:
336 panic("unhandled")
337 case syntax.InstFail:
338
339 case syntax.InstAlt, syntax.InstAltMatch:
340 t = m.add(q, i.Out, pos, cap, cond, t)
341 pc = i.Arg
342 goto Again
343 case syntax.InstEmptyWidth:
344 if cond.match(syntax.EmptyOp(i.Arg)) {
345 pc = i.Out
346 goto Again
347 }
348 case syntax.InstNop:
349 pc = i.Out
350 goto Again
351 case syntax.InstCapture:
352 if int(i.Arg) < len(cap) {
353 opos := cap[i.Arg]
354 cap[i.Arg] = pos
355 m.add(q, i.Out, pos, cap, cond, nil)
356 cap[i.Arg] = opos
357 } else {
358 pc = i.Out
359 goto Again
360 }
361 case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
362 if t == nil {
363 t = m.alloc(i)
364 } else {
365 t.inst = i
366 }
367 if len(cap) > 0 && &t.cap[0] != &cap[0] {
368 copy(t.cap, cap)
369 }
370 d.t = t
371 t = nil
372 }
373 return t
374 }
375
376 type onePassMachine struct {
377 inputs inputs
378 matchcap []int
379 }
380
381 var onePassPool sync.Pool
382
383 func newOnePassMachine() *onePassMachine {
384 m, ok := onePassPool.Get().(*onePassMachine)
385 if !ok {
386 m = new(onePassMachine)
387 }
388 return m
389 }
390
391 func freeOnePassMachine(m *onePassMachine) {
392 m.inputs.clear()
393 onePassPool.Put(m)
394 }
395
396
397 func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap int, dstCap []int) []int {
398 startCond := re.cond
399 if startCond == ^syntax.EmptyOp(0) {
400 return nil
401 }
402
403 m := newOnePassMachine()
404 if cap(m.matchcap) < ncap {
405 m.matchcap = make([]int, ncap)
406 } else {
407 m.matchcap = m.matchcap[:ncap]
408 }
409
410 matched := false
411 for i := range m.matchcap {
412 m.matchcap[i] = -1
413 }
414
415 i, _ := m.inputs.init(ir, ib, is)
416
417 r, r1 := endOfText, endOfText
418 width, width1 := 0, 0
419 r, width = i.step(pos)
420 if r != endOfText {
421 r1, width1 = i.step(pos + width)
422 }
423 var flag lazyFlag
424 if pos == 0 {
425 flag = newLazyFlag(-1, r)
426 } else {
427 flag = i.context(pos)
428 }
429 pc := re.onepass.Start
430 inst := &re.onepass.Inst[pc]
431
432 if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) &&
433 len(re.prefix) > 0 && i.canCheckPrefix() {
434
435 if !i.hasPrefix(re) {
436 goto Return
437 }
438 pos += len(re.prefix)
439 r, width = i.step(pos)
440 r1, width1 = i.step(pos + width)
441 flag = i.context(pos)
442 pc = int(re.prefixEnd)
443 }
444 for {
445 inst = &re.onepass.Inst[pc]
446 pc = int(inst.Out)
447 switch inst.Op {
448 default:
449 panic("bad inst")
450 case syntax.InstMatch:
451 matched = true
452 if len(m.matchcap) > 0 {
453 m.matchcap[0] = 0
454 m.matchcap[1] = pos
455 }
456 goto Return
457 case syntax.InstRune:
458 if !inst.MatchRune(r) {
459 goto Return
460 }
461 case syntax.InstRune1:
462 if r != inst.Rune[0] {
463 goto Return
464 }
465 case syntax.InstRuneAny:
466
467 case syntax.InstRuneAnyNotNL:
468 if r == '\n' {
469 goto Return
470 }
471
472 case syntax.InstAlt, syntax.InstAltMatch:
473 pc = int(onePassNext(inst, r))
474 continue
475 case syntax.InstFail:
476 goto Return
477 case syntax.InstNop:
478 continue
479 case syntax.InstEmptyWidth:
480 if !flag.match(syntax.EmptyOp(inst.Arg)) {
481 goto Return
482 }
483 continue
484 case syntax.InstCapture:
485 if int(inst.Arg) < len(m.matchcap) {
486 m.matchcap[inst.Arg] = pos
487 }
488 continue
489 }
490 if width == 0 {
491 break
492 }
493 flag = newLazyFlag(r, r1)
494 pos += width
495 r, width = r1, width1
496 if r != endOfText {
497 r1, width1 = i.step(pos + width)
498 }
499 }
500
501 Return:
502 if !matched {
503 freeOnePassMachine(m)
504 return nil
505 }
506
507 dstCap = append(dstCap, m.matchcap...)
508 freeOnePassMachine(m)
509 return dstCap
510 }
511
512
513 func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool {
514 return re.doExecute(r, b, s, 0, 0, nil) != nil
515 }
516
517
518
519
520
521 func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int {
522 if dstCap == nil {
523
524 dstCap = arrayNoInts[:0:0]
525 }
526
527 if r == nil && len(b)+len(s) < re.minInputLen {
528 return nil
529 }
530
531 if re.onepass != nil {
532 return re.doOnePass(r, b, s, pos, ncap, dstCap)
533 }
534 if r == nil && len(b)+len(s) < re.maxBitStateLen {
535 return re.backtrack(b, s, pos, ncap, dstCap)
536 }
537
538 m := re.get()
539 i, _ := m.inputs.init(r, b, s)
540
541 m.init(ncap)
542 if !m.match(i, pos) {
543 re.put(m)
544 return nil
545 }
546
547 dstCap = append(dstCap, m.matchcap...)
548 re.put(m)
549 return dstCap
550 }
551
552
553
554 var arrayNoInts [0]int
555
View as plain text