1
2
3
4
5 package syntax
6
7 import "unicode"
8
9
10
11
12
13
14
15
16
17
18
19 type patchList struct {
20 head, tail uint32
21 }
22
23 func makePatchList(n uint32) patchList {
24 return patchList{n, n}
25 }
26
27 func (l patchList) patch(p *Prog, val uint32) {
28 head := l.head
29 for head != 0 {
30 i := &p.Inst[head>>1]
31 if head&1 == 0 {
32 head = i.Out
33 i.Out = val
34 } else {
35 head = i.Arg
36 i.Arg = val
37 }
38 }
39 }
40
41 func (l1 patchList) append(p *Prog, l2 patchList) patchList {
42 if l1.head == 0 {
43 return l2
44 }
45 if l2.head == 0 {
46 return l1
47 }
48
49 i := &p.Inst[l1.tail>>1]
50 if l1.tail&1 == 0 {
51 i.Out = l2.head
52 } else {
53 i.Arg = l2.head
54 }
55 return patchList{l1.head, l2.tail}
56 }
57
58
59 type frag struct {
60 i uint32
61 out patchList
62 }
63
64 type compiler struct {
65 p *Prog
66 }
67
68
69
70 func Compile(re *Regexp) (*Prog, error) {
71 var c compiler
72 c.init()
73 f := c.compile(re)
74 f.out.patch(c.p, c.inst(InstMatch).i)
75 c.p.Start = int(f.i)
76 return c.p, nil
77 }
78
79 func (c *compiler) init() {
80 c.p = new(Prog)
81 c.p.NumCap = 2
82 c.inst(InstFail)
83 }
84
85 var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
86 var anyRune = []rune{0, unicode.MaxRune}
87
88 func (c *compiler) compile(re *Regexp) frag {
89 switch re.Op {
90 case OpNoMatch:
91 return c.fail()
92 case OpEmptyMatch:
93 return c.nop()
94 case OpLiteral:
95 if len(re.Rune) == 0 {
96 return c.nop()
97 }
98 var f frag
99 for j := range re.Rune {
100 f1 := c.rune(re.Rune[j:j+1], re.Flags)
101 if j == 0 {
102 f = f1
103 } else {
104 f = c.cat(f, f1)
105 }
106 }
107 return f
108 case OpCharClass:
109 return c.rune(re.Rune, re.Flags)
110 case OpAnyCharNotNL:
111 return c.rune(anyRuneNotNL, 0)
112 case OpAnyChar:
113 return c.rune(anyRune, 0)
114 case OpBeginLine:
115 return c.empty(EmptyBeginLine)
116 case OpEndLine:
117 return c.empty(EmptyEndLine)
118 case OpBeginText:
119 return c.empty(EmptyBeginText)
120 case OpEndText:
121 return c.empty(EmptyEndText)
122 case OpWordBoundary:
123 return c.empty(EmptyWordBoundary)
124 case OpNoWordBoundary:
125 return c.empty(EmptyNoWordBoundary)
126 case OpCapture:
127 bra := c.cap(uint32(re.Cap << 1))
128 sub := c.compile(re.Sub[0])
129 ket := c.cap(uint32(re.Cap<<1 | 1))
130 return c.cat(c.cat(bra, sub), ket)
131 case OpStar:
132 return c.star(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
133 case OpPlus:
134 return c.plus(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
135 case OpQuest:
136 return c.quest(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
137 case OpConcat:
138 if len(re.Sub) == 0 {
139 return c.nop()
140 }
141 var f frag
142 for i, sub := range re.Sub {
143 if i == 0 {
144 f = c.compile(sub)
145 } else {
146 f = c.cat(f, c.compile(sub))
147 }
148 }
149 return f
150 case OpAlternate:
151 var f frag
152 for _, sub := range re.Sub {
153 f = c.alt(f, c.compile(sub))
154 }
155 return f
156 }
157 panic("regexp: unhandled case in compile")
158 }
159
160 func (c *compiler) inst(op InstOp) frag {
161
162 f := frag{i: uint32(len(c.p.Inst))}
163 c.p.Inst = append(c.p.Inst, Inst{Op: op})
164 return f
165 }
166
167 func (c *compiler) nop() frag {
168 f := c.inst(InstNop)
169 f.out = makePatchList(f.i << 1)
170 return f
171 }
172
173 func (c *compiler) fail() frag {
174 return frag{}
175 }
176
177 func (c *compiler) cap(arg uint32) frag {
178 f := c.inst(InstCapture)
179 f.out = makePatchList(f.i << 1)
180 c.p.Inst[f.i].Arg = arg
181
182 if c.p.NumCap < int(arg)+1 {
183 c.p.NumCap = int(arg) + 1
184 }
185 return f
186 }
187
188 func (c *compiler) cat(f1, f2 frag) frag {
189
190 if f1.i == 0 || f2.i == 0 {
191 return frag{}
192 }
193
194
195
196 f1.out.patch(c.p, f2.i)
197 return frag{f1.i, f2.out}
198 }
199
200 func (c *compiler) alt(f1, f2 frag) frag {
201
202 if f1.i == 0 {
203 return f2
204 }
205 if f2.i == 0 {
206 return f1
207 }
208
209 f := c.inst(InstAlt)
210 i := &c.p.Inst[f.i]
211 i.Out = f1.i
212 i.Arg = f2.i
213 f.out = f1.out.append(c.p, f2.out)
214 return f
215 }
216
217 func (c *compiler) quest(f1 frag, nongreedy bool) frag {
218 f := c.inst(InstAlt)
219 i := &c.p.Inst[f.i]
220 if nongreedy {
221 i.Arg = f1.i
222 f.out = makePatchList(f.i << 1)
223 } else {
224 i.Out = f1.i
225 f.out = makePatchList(f.i<<1 | 1)
226 }
227 f.out = f.out.append(c.p, f1.out)
228 return f
229 }
230
231 func (c *compiler) star(f1 frag, nongreedy bool) frag {
232 f := c.inst(InstAlt)
233 i := &c.p.Inst[f.i]
234 if nongreedy {
235 i.Arg = f1.i
236 f.out = makePatchList(f.i << 1)
237 } else {
238 i.Out = f1.i
239 f.out = makePatchList(f.i<<1 | 1)
240 }
241 f1.out.patch(c.p, f.i)
242 return f
243 }
244
245 func (c *compiler) plus(f1 frag, nongreedy bool) frag {
246 return frag{f1.i, c.star(f1, nongreedy).out}
247 }
248
249 func (c *compiler) empty(op EmptyOp) frag {
250 f := c.inst(InstEmptyWidth)
251 c.p.Inst[f.i].Arg = uint32(op)
252 f.out = makePatchList(f.i << 1)
253 return f
254 }
255
256 func (c *compiler) rune(r []rune, flags Flags) frag {
257 f := c.inst(InstRune)
258 i := &c.p.Inst[f.i]
259 i.Rune = r
260 flags &= FoldCase
261 if len(r) != 1 || unicode.SimpleFold(r[0]) == r[0] {
262
263 flags &^= FoldCase
264 }
265 i.Arg = uint32(flags)
266 f.out = makePatchList(f.i << 1)
267
268
269 switch {
270 case flags&FoldCase == 0 && (len(r) == 1 || len(r) == 2 && r[0] == r[1]):
271 i.Op = InstRune1
272 case len(r) == 2 && r[0] == 0 && r[1] == unicode.MaxRune:
273 i.Op = InstRuneAny
274 case len(r) == 4 && r[0] == 0 && r[1] == '\n'-1 && r[2] == '\n'+1 && r[3] == unicode.MaxRune:
275 i.Op = InstRuneAnyNotNL
276 }
277
278 return f
279 }
280
View as plain text