Source file
src/regexp/backtrack.go
Documentation: regexp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package regexp
16
17 import (
18 "regexp/syntax"
19 "sync"
20 )
21
22
23
24 type job struct {
25 pc uint32
26 arg bool
27 pos int
28 }
29
30 const (
31 visitedBits = 32
32 maxBacktrackProg = 500
33 maxBacktrackVector = 256 * 1024
34 )
35
36
37 type bitState struct {
38 end int
39 cap []int
40 matchcap []int
41 jobs []job
42 visited []uint32
43
44 inputs inputs
45 }
46
47 var bitStatePool sync.Pool
48
49 func newBitState() *bitState {
50 b, ok := bitStatePool.Get().(*bitState)
51 if !ok {
52 b = new(bitState)
53 }
54 return b
55 }
56
57 func freeBitState(b *bitState) {
58 b.inputs.clear()
59 bitStatePool.Put(b)
60 }
61
62
63
64 func maxBitStateLen(prog *syntax.Prog) int {
65 if !shouldBacktrack(prog) {
66 return 0
67 }
68 return maxBacktrackVector / len(prog.Inst)
69 }
70
71
72
73 func shouldBacktrack(prog *syntax.Prog) bool {
74 return len(prog.Inst) <= maxBacktrackProg
75 }
76
77
78
79
80 func (b *bitState) reset(prog *syntax.Prog, end int, ncap int) {
81 b.end = end
82
83 if cap(b.jobs) == 0 {
84 b.jobs = make([]job, 0, 256)
85 } else {
86 b.jobs = b.jobs[:0]
87 }
88
89 visitedSize := (len(prog.Inst)*(end+1) + visitedBits - 1) / visitedBits
90 if cap(b.visited) < visitedSize {
91 b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits)
92 } else {
93 b.visited = b.visited[:visitedSize]
94 clear(b.visited)
95 }
96
97 if cap(b.cap) < ncap {
98 b.cap = make([]int, ncap)
99 } else {
100 b.cap = b.cap[:ncap]
101 }
102 for i := range b.cap {
103 b.cap[i] = -1
104 }
105
106 if cap(b.matchcap) < ncap {
107 b.matchcap = make([]int, ncap)
108 } else {
109 b.matchcap = b.matchcap[:ncap]
110 }
111 for i := range b.matchcap {
112 b.matchcap[i] = -1
113 }
114 }
115
116
117
118 func (b *bitState) shouldVisit(pc uint32, pos int) bool {
119 n := uint(int(pc)*(b.end+1) + pos)
120 if b.visited[n/visitedBits]&(1<<(n&(visitedBits-1))) != 0 {
121 return false
122 }
123 b.visited[n/visitedBits] |= 1 << (n & (visitedBits - 1))
124 return true
125 }
126
127
128
129 func (b *bitState) push(re *Regexp, pc uint32, pos int, arg bool) {
130
131
132 if re.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) {
133 b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos})
134 }
135 }
136
137
138 func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
139 longest := re.longest
140
141 b.push(re, pc, pos, false)
142 for len(b.jobs) > 0 {
143 l := len(b.jobs) - 1
144
145 pc := b.jobs[l].pc
146 pos := b.jobs[l].pos
147 arg := b.jobs[l].arg
148 b.jobs = b.jobs[:l]
149
150
151
152
153
154
155
156
157 goto Skip
158 CheckAndLoop:
159 if !b.shouldVisit(pc, pos) {
160 continue
161 }
162 Skip:
163
164 inst := &re.prog.Inst[pc]
165
166 switch inst.Op {
167 default:
168 panic("bad inst")
169 case syntax.InstFail:
170 panic("unexpected InstFail")
171 case syntax.InstAlt:
172
173
174
175
176
177
178
179
180 if arg {
181
182 arg = false
183 pc = inst.Arg
184 goto CheckAndLoop
185 } else {
186 b.push(re, pc, pos, true)
187 pc = inst.Out
188 goto CheckAndLoop
189 }
190
191 case syntax.InstAltMatch:
192
193 switch re.prog.Inst[inst.Out].Op {
194 case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
195
196 b.push(re, inst.Arg, pos, false)
197 pc = inst.Arg
198 pos = b.end
199 goto CheckAndLoop
200 }
201
202 b.push(re, inst.Out, b.end, false)
203 pc = inst.Out
204 goto CheckAndLoop
205
206 case syntax.InstRune:
207 r, width := i.step(pos)
208 if !inst.MatchRune(r) {
209 continue
210 }
211 pos += width
212 pc = inst.Out
213 goto CheckAndLoop
214
215 case syntax.InstRune1:
216 r, width := i.step(pos)
217 if r != inst.Rune[0] {
218 continue
219 }
220 pos += width
221 pc = inst.Out
222 goto CheckAndLoop
223
224 case syntax.InstRuneAnyNotNL:
225 r, width := i.step(pos)
226 if r == '\n' || r == endOfText {
227 continue
228 }
229 pos += width
230 pc = inst.Out
231 goto CheckAndLoop
232
233 case syntax.InstRuneAny:
234 r, width := i.step(pos)
235 if r == endOfText {
236 continue
237 }
238 pos += width
239 pc = inst.Out
240 goto CheckAndLoop
241
242 case syntax.InstCapture:
243 if arg {
244
245 b.cap[inst.Arg] = pos
246 continue
247 } else {
248 if inst.Arg < uint32(len(b.cap)) {
249
250 b.push(re, pc, b.cap[inst.Arg], true)
251 b.cap[inst.Arg] = pos
252 }
253 pc = inst.Out
254 goto CheckAndLoop
255 }
256
257 case syntax.InstEmptyWidth:
258 flag := i.context(pos)
259 if !flag.match(syntax.EmptyOp(inst.Arg)) {
260 continue
261 }
262 pc = inst.Out
263 goto CheckAndLoop
264
265 case syntax.InstNop:
266 pc = inst.Out
267 goto CheckAndLoop
268
269 case syntax.InstMatch:
270
271
272 if len(b.cap) == 0 {
273 return true
274 }
275
276
277
278
279 if len(b.cap) > 1 {
280 b.cap[1] = pos
281 }
282 if old := b.matchcap[1]; old == -1 || (longest && pos > 0 && pos > old) {
283 copy(b.matchcap, b.cap)
284 }
285
286
287 if !longest {
288 return true
289 }
290
291
292 if pos == b.end {
293 return true
294 }
295
296
297 continue
298 }
299 }
300
301 return longest && len(b.matchcap) > 1 && b.matchcap[1] >= 0
302 }
303
304
305 func (re *Regexp) backtrack(ib []byte, is string, pos int, ncap int, dstCap []int) []int {
306 startCond := re.cond
307 if startCond == ^syntax.EmptyOp(0) {
308 return nil
309 }
310 if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
311
312 return nil
313 }
314
315 b := newBitState()
316 i, end := b.inputs.init(nil, ib, is)
317 b.reset(re.prog, end, ncap)
318
319
320 if startCond&syntax.EmptyBeginText != 0 {
321 if len(b.cap) > 0 {
322 b.cap[0] = pos
323 }
324 if !re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
325 freeBitState(b)
326 return nil
327 }
328 } else {
329
330
331
332
333
334
335
336 width := -1
337 for ; pos <= end && width != 0; pos += width {
338 if len(re.prefix) > 0 {
339
340 advance := i.index(re, pos)
341 if advance < 0 {
342 freeBitState(b)
343 return nil
344 }
345 pos += advance
346 }
347
348 if len(b.cap) > 0 {
349 b.cap[0] = pos
350 }
351 if re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
352
353 goto Match
354 }
355 _, width = i.step(pos)
356 }
357 freeBitState(b)
358 return nil
359 }
360
361 Match:
362 dstCap = append(dstCap, b.matchcap...)
363 freeBitState(b)
364 return dstCap
365 }
366
View as plain text