Source file
src/regexp/onepass.go
Documentation: regexp
1
2
3
4
5 package regexp
6
7 import (
8 "regexp/syntax"
9 "sort"
10 "strings"
11 "unicode"
12 "unicode/utf8"
13 )
14
15
16
17
18
19
20
21
22
23 type onePassProg struct {
24 Inst []onePassInst
25 Start int
26 NumCap int
27 }
28
29
30
31 type onePassInst struct {
32 syntax.Inst
33 Next []uint32
34 }
35
36
37
38
39
40
41 func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
42 i := &p.Inst[p.Start]
43 if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 {
44 return "", i.Op == syntax.InstMatch, uint32(p.Start)
45 }
46 pc = i.Out
47 i = &p.Inst[pc]
48 for i.Op == syntax.InstNop {
49 pc = i.Out
50 i = &p.Inst[pc]
51 }
52
53 if iop(i) != syntax.InstRune || len(i.Rune) != 1 {
54 return "", i.Op == syntax.InstMatch, uint32(p.Start)
55 }
56
57
58 var buf strings.Builder
59 for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 && i.Rune[0] != utf8.RuneError {
60 buf.WriteRune(i.Rune[0])
61 pc, i = i.Out, &p.Inst[i.Out]
62 }
63 if i.Op == syntax.InstEmptyWidth &&
64 syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 &&
65 p.Inst[i.Out].Op == syntax.InstMatch {
66 complete = true
67 }
68 return buf.String(), complete, pc
69 }
70
71
72
73
74
75 func onePassNext(i *onePassInst, r rune) uint32 {
76 next := i.MatchRunePos(r)
77 if next >= 0 {
78 return i.Next[next]
79 }
80 if i.Op == syntax.InstAltMatch {
81 return i.Out
82 }
83 return 0
84 }
85
86 func iop(i *syntax.Inst) syntax.InstOp {
87 op := i.Op
88 switch op {
89 case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
90 op = syntax.InstRune
91 }
92 return op
93 }
94
95
96 type queueOnePass struct {
97 sparse []uint32
98 dense []uint32
99 size, nextIndex uint32
100 }
101
102 func (q *queueOnePass) empty() bool {
103 return q.nextIndex >= q.size
104 }
105
106 func (q *queueOnePass) next() (n uint32) {
107 n = q.dense[q.nextIndex]
108 q.nextIndex++
109 return
110 }
111
112 func (q *queueOnePass) clear() {
113 q.size = 0
114 q.nextIndex = 0
115 }
116
117 func (q *queueOnePass) contains(u uint32) bool {
118 if u >= uint32(len(q.sparse)) {
119 return false
120 }
121 return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u
122 }
123
124 func (q *queueOnePass) insert(u uint32) {
125 if !q.contains(u) {
126 q.insertNew(u)
127 }
128 }
129
130 func (q *queueOnePass) insertNew(u uint32) {
131 if u >= uint32(len(q.sparse)) {
132 return
133 }
134 q.sparse[u] = q.size
135 q.dense[q.size] = u
136 q.size++
137 }
138
139 func newQueue(size int) (q *queueOnePass) {
140 return &queueOnePass{
141 sparse: make([]uint32, size),
142 dense: make([]uint32, size),
143 }
144 }
145
146
147
148
149
150
151 const mergeFailed = uint32(0xffffffff)
152
153 var (
154 noRune = []rune{}
155 noNext = []uint32{mergeFailed}
156 )
157
158 func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) {
159 leftLen := len(*leftRunes)
160 rightLen := len(*rightRunes)
161 if leftLen&0x1 != 0 || rightLen&0x1 != 0 {
162 panic("mergeRuneSets odd length []rune")
163 }
164 var (
165 lx, rx int
166 )
167 merged := make([]rune, 0)
168 next := make([]uint32, 0)
169 ok := true
170 defer func() {
171 if !ok {
172 merged = nil
173 next = nil
174 }
175 }()
176
177 ix := -1
178 extend := func(newLow *int, newArray *[]rune, pc uint32) bool {
179 if ix > 0 && (*newArray)[*newLow] <= merged[ix] {
180 return false
181 }
182 merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1])
183 *newLow += 2
184 ix += 2
185 next = append(next, pc)
186 return true
187 }
188
189 for lx < leftLen || rx < rightLen {
190 switch {
191 case rx >= rightLen:
192 ok = extend(&lx, leftRunes, leftPC)
193 case lx >= leftLen:
194 ok = extend(&rx, rightRunes, rightPC)
195 case (*rightRunes)[rx] < (*leftRunes)[lx]:
196 ok = extend(&rx, rightRunes, rightPC)
197 default:
198 ok = extend(&lx, leftRunes, leftPC)
199 }
200 if !ok {
201 return noRune, noNext
202 }
203 }
204 return merged, next
205 }
206
207
208 func cleanupOnePass(prog *onePassProg, original *syntax.Prog) {
209 for ix, instOriginal := range original.Inst {
210 switch instOriginal.Op {
211 case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune:
212 case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail:
213 prog.Inst[ix].Next = nil
214 case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
215 prog.Inst[ix].Next = nil
216 prog.Inst[ix] = onePassInst{Inst: instOriginal}
217 }
218 }
219 }
220
221
222 func onePassCopy(prog *syntax.Prog) *onePassProg {
223 p := &onePassProg{
224 Start: prog.Start,
225 NumCap: prog.NumCap,
226 Inst: make([]onePassInst, len(prog.Inst)),
227 }
228 for i, inst := range prog.Inst {
229 p.Inst[i] = onePassInst{Inst: inst}
230 }
231
232
233
234
235
236
237 for pc := range p.Inst {
238 switch p.Inst[pc].Op {
239 default:
240 continue
241 case syntax.InstAlt, syntax.InstAltMatch:
242
243 p_A_Other := &p.Inst[pc].Out
244 p_A_Alt := &p.Inst[pc].Arg
245
246 instAlt := p.Inst[*p_A_Alt]
247 if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
248 p_A_Alt, p_A_Other = p_A_Other, p_A_Alt
249 instAlt = p.Inst[*p_A_Alt]
250 if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
251 continue
252 }
253 }
254 instOther := p.Inst[*p_A_Other]
255
256 if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch {
257
258 continue
259 }
260
261
262 p_B_Alt := &p.Inst[*p_A_Alt].Out
263 p_B_Other := &p.Inst[*p_A_Alt].Arg
264 patch := false
265 if instAlt.Out == uint32(pc) {
266 patch = true
267 } else if instAlt.Arg == uint32(pc) {
268 patch = true
269 p_B_Alt, p_B_Other = p_B_Other, p_B_Alt
270 }
271 if patch {
272 *p_B_Alt = *p_A_Other
273 }
274
275
276
277 if *p_A_Other == *p_B_Alt {
278 *p_A_Alt = *p_B_Other
279 }
280 }
281 }
282 return p
283 }
284
285
286 type runeSlice []rune
287
288 func (p runeSlice) Len() int { return len(p) }
289 func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] }
290 func (p runeSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
291
292 var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
293 var anyRune = []rune{0, unicode.MaxRune}
294
295
296
297
298
299
300 func makeOnePass(p *onePassProg) *onePassProg {
301
302 if len(p.Inst) >= 1000 {
303 return nil
304 }
305
306 var (
307 instQueue = newQueue(len(p.Inst))
308 visitQueue = newQueue(len(p.Inst))
309 check func(uint32, []bool) bool
310 onePassRunes = make([][]rune, len(p.Inst))
311 )
312
313
314
315 check = func(pc uint32, m []bool) (ok bool) {
316 ok = true
317 inst := &p.Inst[pc]
318 if visitQueue.contains(pc) {
319 return
320 }
321 visitQueue.insert(pc)
322 switch inst.Op {
323 case syntax.InstAlt, syntax.InstAltMatch:
324 ok = check(inst.Out, m) && check(inst.Arg, m)
325
326 matchOut := m[inst.Out]
327 matchArg := m[inst.Arg]
328 if matchOut && matchArg {
329 ok = false
330 break
331 }
332
333 if matchArg {
334 inst.Out, inst.Arg = inst.Arg, inst.Out
335 matchOut, matchArg = matchArg, matchOut
336 }
337 if matchOut {
338 m[pc] = true
339 inst.Op = syntax.InstAltMatch
340 }
341
342
343 onePassRunes[pc], inst.Next = mergeRuneSets(
344 &onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg)
345 if len(inst.Next) > 0 && inst.Next[0] == mergeFailed {
346 ok = false
347 break
348 }
349 case syntax.InstCapture, syntax.InstNop:
350 ok = check(inst.Out, m)
351 m[pc] = m[inst.Out]
352
353 onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
354 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
355 for i := range inst.Next {
356 inst.Next[i] = inst.Out
357 }
358 case syntax.InstEmptyWidth:
359 ok = check(inst.Out, m)
360 m[pc] = m[inst.Out]
361 onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
362 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
363 for i := range inst.Next {
364 inst.Next[i] = inst.Out
365 }
366 case syntax.InstMatch, syntax.InstFail:
367 m[pc] = inst.Op == syntax.InstMatch
368 case syntax.InstRune:
369 m[pc] = false
370 if len(inst.Next) > 0 {
371 break
372 }
373 instQueue.insert(inst.Out)
374 if len(inst.Rune) == 0 {
375 onePassRunes[pc] = []rune{}
376 inst.Next = []uint32{inst.Out}
377 break
378 }
379 runes := make([]rune, 0)
380 if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
381 r0 := inst.Rune[0]
382 runes = append(runes, r0, r0)
383 for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
384 runes = append(runes, r1, r1)
385 }
386 sort.Sort(runeSlice(runes))
387 } else {
388 runes = append(runes, inst.Rune...)
389 }
390 onePassRunes[pc] = runes
391 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
392 for i := range inst.Next {
393 inst.Next[i] = inst.Out
394 }
395 inst.Op = syntax.InstRune
396 case syntax.InstRune1:
397 m[pc] = false
398 if len(inst.Next) > 0 {
399 break
400 }
401 instQueue.insert(inst.Out)
402 runes := []rune{}
403
404 if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
405 r0 := inst.Rune[0]
406 runes = append(runes, r0, r0)
407 for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
408 runes = append(runes, r1, r1)
409 }
410 sort.Sort(runeSlice(runes))
411 } else {
412 runes = append(runes, inst.Rune[0], inst.Rune[0])
413 }
414 onePassRunes[pc] = runes
415 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
416 for i := range inst.Next {
417 inst.Next[i] = inst.Out
418 }
419 inst.Op = syntax.InstRune
420 case syntax.InstRuneAny:
421 m[pc] = false
422 if len(inst.Next) > 0 {
423 break
424 }
425 instQueue.insert(inst.Out)
426 onePassRunes[pc] = append([]rune{}, anyRune...)
427 inst.Next = []uint32{inst.Out}
428 case syntax.InstRuneAnyNotNL:
429 m[pc] = false
430 if len(inst.Next) > 0 {
431 break
432 }
433 instQueue.insert(inst.Out)
434 onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
435 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
436 for i := range inst.Next {
437 inst.Next[i] = inst.Out
438 }
439 }
440 return
441 }
442
443 instQueue.clear()
444 instQueue.insert(uint32(p.Start))
445 m := make([]bool, len(p.Inst))
446 for !instQueue.empty() {
447 visitQueue.clear()
448 pc := instQueue.next()
449 if !check(pc, m) {
450 p = nil
451 break
452 }
453 }
454 if p != nil {
455 for i := range p.Inst {
456 p.Inst[i].Rune = onePassRunes[i]
457 }
458 }
459 return p
460 }
461
462
463
464
465
466 func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
467 if prog.Start == 0 {
468 return nil
469 }
470
471 if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth ||
472 syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
473 return nil
474 }
475
476 for _, inst := range prog.Inst {
477 opOut := prog.Inst[inst.Out].Op
478 switch inst.Op {
479 default:
480 if opOut == syntax.InstMatch {
481 return nil
482 }
483 case syntax.InstAlt, syntax.InstAltMatch:
484 if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch {
485 return nil
486 }
487 case syntax.InstEmptyWidth:
488 if opOut == syntax.InstMatch {
489 if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
490 continue
491 }
492 return nil
493 }
494 }
495 }
496
497
498 p = onePassCopy(prog)
499
500
501 p = makeOnePass(p)
502
503 if p != nil {
504 cleanupOnePass(p, prog)
505 }
506 return p
507 }
508
View as plain text