1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69 package regexp
70
71 import (
72 "bytes"
73 "io"
74 "os"
75 "strings"
76 "utf8"
77 )
78
79 var debug = false
80
81
82 type Error string
83
84 func (e Error) String() string {
85 return string(e)
86 }
87
88
89 var (
90 ErrInternal = Error("regexp: internal error")
91 ErrUnmatchedLpar = Error("regexp: unmatched '('")
92 ErrUnmatchedRpar = Error("regexp: unmatched ')'")
93 ErrUnmatchedLbkt = Error("regexp: unmatched '['")
94 ErrUnmatchedRbkt = Error("regexp: unmatched ']'")
95 ErrBadRange = Error("regexp: bad range in character class")
96 ErrExtraneousBackslash = Error("regexp: extraneous backslash")
97 ErrBadClosure = Error("regexp: repeated closure (**, ++, etc.)")
98 ErrBareClosure = Error("regexp: closure applies to nothing")
99 ErrBadBackslash = Error("regexp: illegal backslash escape")
100 )
101
102 const (
103 iStart = iota
104 iEnd
105 iBOT
106 iEOT
107 iChar
108 iCharClass
109 iAny
110 iNotNL
111 iBra
112 iAlt
113 iNop
114 )
115
116
117 type instr struct {
118 kind int
119 index int
120 next *instr
121
122 char int
123 braNum int
124 cclass *charClass
125 left *instr
126 }
127
128 func (i *instr) print() {
129 switch i.kind {
130 case iStart:
131 print("start")
132 case iEnd:
133 print("end")
134 case iBOT:
135 print("bot")
136 case iEOT:
137 print("eot")
138 case iChar:
139 print("char ", string(i.char))
140 case iCharClass:
141 i.cclass.print()
142 case iAny:
143 print("any")
144 case iNotNL:
145 print("notnl")
146 case iBra:
147 if i.braNum&1 == 0 {
148 print("bra", i.braNum/2)
149 } else {
150 print("ebra", i.braNum/2)
151 }
152 case iAlt:
153 print("alt(", i.left.index, ")")
154 case iNop:
155 print("nop")
156 }
157 }
158
159
160
161
162 type Regexp struct {
163 expr string
164 prefix string
165 prefixBytes []byte
166 inst []*instr
167 start *instr
168 prefixStart *instr
169 nbra int
170 }
171
172 type charClass struct {
173 negate bool
174
175 ranges []int
176 cmin, cmax int
177 }
178
179 func (cclass *charClass) print() {
180 print("charclass")
181 if cclass.negate {
182 print(" (negated)")
183 }
184 for i := 0; i < len(cclass.ranges); i += 2 {
185 l := cclass.ranges[i]
186 r := cclass.ranges[i+1]
187 if l == r {
188 print(" [", string(l), "]")
189 } else {
190 print(" [", string(l), "-", string(r), "]")
191 }
192 }
193 }
194
195 func (cclass *charClass) addRange(a, b int) {
196
197 cclass.ranges = append(cclass.ranges, a, b)
198 if a < cclass.cmin {
199 cclass.cmin = a
200 }
201 if b > cclass.cmax {
202 cclass.cmax = b
203 }
204 }
205
206 func (cclass *charClass) matches(c int) bool {
207 if c < cclass.cmin || c > cclass.cmax {
208 return cclass.negate
209 }
210 ranges := cclass.ranges
211 for i := 0; i < len(ranges); i = i + 2 {
212 if ranges[i] <= c && c <= ranges[i+1] {
213 return !cclass.negate
214 }
215 }
216 return cclass.negate
217 }
218
219 func newCharClass() *instr {
220 i := &instr{kind: iCharClass}
221 i.cclass = new(charClass)
222 i.cclass.ranges = make([]int, 0, 4)
223 i.cclass.cmin = 0x10FFFF + 1
224 i.cclass.cmax = -1
225 return i
226 }
227
228 func (re *Regexp) add(i *instr) *instr {
229 i.index = len(re.inst)
230 re.inst = append(re.inst, i)
231 return i
232 }
233
234 type parser struct {
235 re *Regexp
236 nlpar int
237 pos int
238 ch int
239 }
240
241 func (p *parser) error(err Error) {
242 panic(err)
243 }
244
245 const endOfText = -1
246
247 func (p *parser) c() int { return p.ch }
248
249 func (p *parser) nextc() int {
250 if p.pos >= len(p.re.expr) {
251 p.ch = endOfText
252 } else {
253 c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:])
254 p.ch = c
255 p.pos += w
256 }
257 return p.ch
258 }
259
260 func newParser(re *Regexp) *parser {
261 p := new(parser)
262 p.re = re
263 p.nextc()
264 return p
265 }
266
267 func special(c int) bool {
268 for _, r := range `\.+*?()|[]^$` {
269 if c == r {
270 return true
271 }
272 }
273 return false
274 }
275
276 func ispunct(c int) bool {
277 for _, r := range "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" {
278 if c == r {
279 return true
280 }
281 }
282 return false
283 }
284
285 var escapes = []byte("abfnrtv")
286 var escaped = []byte("\a\b\f\n\r\t\v")
287
288 func escape(c int) int {
289 for i, b := range escapes {
290 if int(b) == c {
291 return i
292 }
293 }
294 return -1
295 }
296
297 func (p *parser) checkBackslash() int {
298 c := p.c()
299 if c == '\\' {
300 c = p.nextc()
301 switch {
302 case c == endOfText:
303 p.error(ErrExtraneousBackslash)
304 case ispunct(c):
305
306 case escape(c) >= 0:
307 c = int(escaped[escape(c)])
308 default:
309 p.error(ErrBadBackslash)
310 }
311 }
312 return c
313 }
314
315 func (p *parser) charClass() *instr {
316 i := newCharClass()
317 cc := i.cclass
318 if p.c() == '^' {
319 cc.negate = true
320 p.nextc()
321 }
322 left := -1
323 for {
324 switch c := p.c(); c {
325 case ']', endOfText:
326 if left >= 0 {
327 p.error(ErrBadRange)
328 }
329
330 if cc.negate && len(cc.ranges) == 2 &&
331 cc.ranges[0] == '\n' && cc.ranges[1] == '\n' {
332 nl := &instr{kind: iNotNL}
333 p.re.add(nl)
334 return nl
335 }
336
337 if !cc.negate && len(cc.ranges) == 2 && cc.ranges[0] == cc.ranges[1] {
338 c := &instr{kind: iChar, char: cc.ranges[0]}
339 p.re.add(c)
340 return c
341 }
342 p.re.add(i)
343 return i
344 case '-':
345 p.error(ErrBadRange)
346 default:
347 c = p.checkBackslash()
348 p.nextc()
349 switch {
350 case left < 0:
351 if p.c() == '-' {
352 p.nextc()
353 left = c
354 } else {
355 cc.addRange(c, c)
356 }
357 case left <= c:
358 cc.addRange(left, c)
359 left = -1
360 default:
361 p.error(ErrBadRange)
362 }
363 }
364 }
365 panic("unreachable")
366 }
367
368 func (p *parser) term() (start, end *instr) {
369 switch c := p.c(); c {
370 case '|', endOfText:
371 return nil, nil
372 case '*', '+', '?':
373 p.error(ErrBareClosure)
374 case ')':
375 if p.nlpar == 0 {
376 p.error(ErrUnmatchedRpar)
377 }
378 return nil, nil
379 case ']':
380 p.error(ErrUnmatchedRbkt)
381 case '^':
382 p.nextc()
383 start = p.re.add(&instr{kind: iBOT})
384 return start, start
385 case '$':
386 p.nextc()
387 start = p.re.add(&instr{kind: iEOT})
388 return start, start
389 case '.':
390 p.nextc()
391 start = p.re.add(&instr{kind: iAny})
392 return start, start
393 case '[':
394 p.nextc()
395 start = p.charClass()
396 if p.c() != ']' {
397 p.error(ErrUnmatchedLbkt)
398 }
399 p.nextc()
400 return start, start
401 case '(':
402 p.nextc()
403 p.nlpar++
404 p.re.nbra++
405 nbra := p.re.nbra
406 start, end = p.regexp()
407 if p.c() != ')' {
408 p.error(ErrUnmatchedLpar)
409 }
410 p.nlpar--
411 p.nextc()
412 bra := &instr{kind: iBra, braNum: 2 * nbra}
413 p.re.add(bra)
414 ebra := &instr{kind: iBra, braNum: 2*nbra + 1}
415 p.re.add(ebra)
416 if start == nil {
417 if end == nil {
418 p.error(ErrInternal)
419 return
420 }
421 start = ebra
422 } else {
423 end.next = ebra
424 }
425 bra.next = start
426 return bra, ebra
427 default:
428 c = p.checkBackslash()
429 p.nextc()
430 start = &instr{kind: iChar, char: c}
431 p.re.add(start)
432 return start, start
433 }
434 panic("unreachable")
435 }
436
437 func (p *parser) closure() (start, end *instr) {
438 start, end = p.term()
439 if start == nil {
440 return
441 }
442 switch p.c() {
443 case '*':
444
445 alt := &instr{kind: iAlt}
446 p.re.add(alt)
447 end.next = alt
448 alt.left = start
449 start = alt
450 end = alt
451 case '+':
452
453 alt := &instr{kind: iAlt}
454 p.re.add(alt)
455 end.next = alt
456 alt.left = start
457 end = alt
458 case '?':
459
460 alt := &instr{kind: iAlt}
461 p.re.add(alt)
462 nop := &instr{kind: iNop}
463 p.re.add(nop)
464 alt.left = start
465 alt.next = nop
466 end.next = nop
467 start = alt
468 end = nop
469 default:
470 return
471 }
472 switch p.nextc() {
473 case '*', '+', '?':
474 p.error(ErrBadClosure)
475 }
476 return
477 }
478
479 func (p *parser) concatenation() (start, end *instr) {
480 for {
481 nstart, nend := p.closure()
482 switch {
483 case nstart == nil:
484 if start == nil {
485 nop := p.re.add(&instr{kind: iNop})
486 return nop, nop
487 }
488 return
489 case start == nil:
490 start, end = nstart, nend
491 default:
492 end.next = nstart
493 end = nend
494 }
495 }
496 panic("unreachable")
497 }
498
499 func (p *parser) regexp() (start, end *instr) {
500 start, end = p.concatenation()
501 for {
502 switch p.c() {
503 default:
504 return
505 case '|':
506 p.nextc()
507 nstart, nend := p.concatenation()
508 alt := &instr{kind: iAlt}
509 p.re.add(alt)
510 alt.left = start
511 alt.next = nstart
512 nop := &instr{kind: iNop}
513 p.re.add(nop)
514 end.next = nop
515 nend.next = nop
516 start, end = alt, nop
517 }
518 }
519 panic("unreachable")
520 }
521
522 func unNop(i *instr) *instr {
523 for i.kind == iNop {
524 i = i.next
525 }
526 return i
527 }
528
529 func (re *Regexp) eliminateNops() {
530 for _, inst := range re.inst {
531 if inst.kind == iEnd {
532 continue
533 }
534 inst.next = unNop(inst.next)
535 if inst.kind == iAlt {
536 inst.left = unNop(inst.left)
537 }
538 }
539 }
540
541 func (re *Regexp) dump() {
542 print("prefix <", re.prefix, ">\n")
543 for _, inst := range re.inst {
544 print(inst.index, ": ")
545 inst.print()
546 if inst.kind != iEnd {
547 print(" -> ", inst.next.index)
548 }
549 print("\n")
550 }
551 }
552
553 func (re *Regexp) doParse() {
554 p := newParser(re)
555 start := &instr{kind: iStart}
556 re.add(start)
557 s, e := p.regexp()
558 start.next = s
559 re.start = start
560 e.next = re.add(&instr{kind: iEnd})
561
562 if debug {
563 re.dump()
564 println()
565 }
566
567 re.eliminateNops()
568 if debug {
569 re.dump()
570 println()
571 }
572 re.setPrefix()
573 if debug {
574 re.dump()
575 println()
576 }
577 }
578
579
580
581
582 func (re *Regexp) setPrefix() {
583 var b []byte
584 var utf = make([]byte, utf8.UTFMax)
585 var inst *instr
586
587 inst = re.inst[0].next
588 for inst.kind == iBOT {
589 inst = inst.next
590 }
591 Loop:
592 for ; inst.kind != iEnd; inst = inst.next {
593
594 if inst.kind != iChar {
595 break
596 }
597
598
599 switch inst.next.kind {
600 case iBOT, iEOT, iAlt:
601 break Loop
602 }
603 n := utf8.EncodeRune(utf, inst.char)
604 b = append(b, utf[0:n]...)
605 }
606
607 re.prefixStart = inst
608 re.prefixBytes = b
609 re.prefix = string(b)
610 }
611
612
613 func (re *Regexp) String() string {
614 return re.expr
615 }
616
617
618
619 func Compile(str string) (regexp *Regexp, error os.Error) {
620 regexp = new(Regexp)
621
622 defer func() {
623 if e := recover(); e != nil {
624 regexp = nil
625 error = e.(Error)
626 }
627 }()
628 regexp.expr = str
629 regexp.inst = make([]*instr, 0, 10)
630 regexp.doParse()
631 return
632 }
633
634
635
636
637 func MustCompile(str string) *Regexp {
638 regexp, error := Compile(str)
639 if error != nil {
640 panic(`regexp: compiling "` + str + `": ` + error.String())
641 }
642 return regexp
643 }
644
645
646 func (re *Regexp) NumSubexp() int { return re.nbra }
647
648
649
650
651 type matchArena struct {
652 head *matchVec
653 len int
654 pos int
655 atBOT bool
656 atEOT bool
657 }
658
659 type matchVec struct {
660 m []int
661 ref int
662 next *matchVec
663 }
664
665 func (a *matchArena) new() *matchVec {
666 if a.head == nil {
667 const N = 10
668 block := make([]matchVec, N)
669 for i := 0; i < N; i++ {
670 b := &block[i]
671 b.next = a.head
672 a.head = b
673 }
674 }
675 m := a.head
676 a.head = m.next
677 m.ref = 0
678 if m.m == nil {
679 m.m = make([]int, a.len)
680 }
681 return m
682 }
683
684 func (a *matchArena) free(m *matchVec) {
685 m.ref--
686 if m.ref == 0 {
687 m.next = a.head
688 a.head = m
689 }
690 }
691
692 func (a *matchArena) copy(m *matchVec) *matchVec {
693 m1 := a.new()
694 copy(m1.m, m.m)
695 return m1
696 }
697
698 func (a *matchArena) noMatch() *matchVec {
699 m := a.new()
700 for i := range m.m {
701 m.m[i] = -1
702 }
703 m.ref = 1
704 return m
705 }
706
707 type state struct {
708 inst *instr
709 prefixed bool
710 match *matchVec
711 }
712
713
714
715
716 func (a *matchArena) addState(s []state, inst *instr, prefixed bool, match *matchVec) []state {
717 switch inst.kind {
718 case iBOT:
719 if a.atBOT {
720 s = a.addState(s, inst.next, prefixed, match)
721 }
722 return s
723 case iEOT:
724 if a.atEOT {
725 s = a.addState(s, inst.next, prefixed, match)
726 }
727 return s
728 case iBra:
729 match.m[inst.braNum] = a.pos
730 s = a.addState(s, inst.next, prefixed, match)
731 return s
732 }
733 l := len(s)
734
735
736 for i := 0; i < l; i++ {
737 if s[i].inst == inst {
738 return s
739 }
740 }
741 s = append(s, state{inst, prefixed, match})
742 match.ref++
743 if inst.kind == iAlt {
744 s = a.addState(s, inst.left, prefixed, a.copy(match))
745
746 s = a.addState(s, inst.next, prefixed, a.copy(match))
747 }
748 return s
749 }
750
751
752
753 type input interface {
754 step(pos int) (rune int, width int)
755 canCheckPrefix() bool
756 hasPrefix(re *Regexp) bool
757 index(re *Regexp, pos int) int
758 }
759
760
761 type inputString struct {
762 str string
763 }
764
765 func newInputString(str string) *inputString {
766 return &inputString{str: str}
767 }
768
769 func (i *inputString) step(pos int) (int, int) {
770 if pos < len(i.str) {
771 return utf8.DecodeRuneInString(i.str[pos:len(i.str)])
772 }
773 return endOfText, 0
774 }
775
776 func (i *inputString) canCheckPrefix() bool {
777 return true
778 }
779
780 func (i *inputString) hasPrefix(re *Regexp) bool {
781 return strings.HasPrefix(i.str, re.prefix)
782 }
783
784 func (i *inputString) index(re *Regexp, pos int) int {
785 return strings.Index(i.str[pos:], re.prefix)
786 }
787
788
789 type inputBytes struct {
790 str []byte
791 }
792
793 func newInputBytes(str []byte) *inputBytes {
794 return &inputBytes{str: str}
795 }
796
797 func (i *inputBytes) step(pos int) (int, int) {
798 if pos < len(i.str) {
799 return utf8.DecodeRune(i.str[pos:len(i.str)])
800 }
801 return endOfText, 0
802 }
803
804 func (i *inputBytes) canCheckPrefix() bool {
805 return true
806 }
807
808 func (i *inputBytes) hasPrefix(re *Regexp) bool {
809 return bytes.HasPrefix(i.str, re.prefixBytes)
810 }
811
812 func (i *inputBytes) index(re *Regexp, pos int) int {
813 return bytes.Index(i.str[pos:], re.prefixBytes)
814 }
815
816
817 type inputReader struct {
818 r io.RuneReader
819 atEOT bool
820 pos int
821 }
822
823 func newInputReader(r io.RuneReader) *inputReader {
824 return &inputReader{r: r}
825 }
826
827 func (i *inputReader) step(pos int) (int, int) {
828 if !i.atEOT && pos != i.pos {
829 return endOfText, 0
830
831 }
832 r, w, err := i.r.ReadRune()
833 if err != nil {
834 i.atEOT = true
835 return endOfText, 0
836 }
837 i.pos += w
838 return r, w
839 }
840
841 func (i *inputReader) canCheckPrefix() bool {
842 return false
843 }
844
845 func (i *inputReader) hasPrefix(re *Regexp) bool {
846 return false
847 }
848
849 func (i *inputReader) index(re *Regexp, pos int) int {
850 return -1
851 }
852
853
854 func (re *Regexp) doExecute(i input, pos int) []int {
855 var s [2][]state
856 s[0] = make([]state, 0, 10)
857 s[1] = make([]state, 0, 10)
858 in, out := 0, 1
859 var final state
860 found := false
861 anchored := re.inst[0].next.kind == iBOT
862 if anchored && pos > 0 {
863 return nil
864 }
865
866 if i.canCheckPrefix() && re.prefix != "" {
867 advance := 0
868 if anchored {
869 if !i.hasPrefix(re) {
870 return nil
871 }
872 } else {
873 advance = i.index(re, pos)
874 if advance == -1 {
875 return nil
876 }
877 }
878 pos += advance
879 }
880
881
882 nextChar, nextWidth := i.step(pos)
883 arena := &matchArena{
884 len: 2 * (re.nbra + 1),
885 pos: pos,
886 atBOT: pos == 0,
887 atEOT: nextChar == endOfText,
888 }
889 for c, startPos := 0, pos; c != endOfText; {
890 if !found && (pos == startPos || !anchored) {
891
892 match := arena.noMatch()
893 match.m[0] = pos
894 s[out] = arena.addState(s[out], re.start.next, false, match)
895 arena.free(match)
896 } else if len(s[out]) == 0 {
897
898 break
899 }
900 in, out = out, in
901
902 old := s[out]
903 for _, state := range old {
904 arena.free(state.match)
905 }
906 s[out] = old[0:0]
907 c = nextChar
908 thisPos := pos
909 pos += nextWidth
910 nextChar, nextWidth = i.step(pos)
911 arena.atEOT = nextChar == endOfText
912 arena.atBOT = false
913 arena.pos = pos
914 for _, st := range s[in] {
915 switch st.inst.kind {
916 case iBOT:
917 case iEOT:
918 case iChar:
919 if c == st.inst.char {
920 s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
921 }
922 case iCharClass:
923 if st.inst.cclass.matches(c) {
924 s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
925 }
926 case iAny:
927 if c != endOfText {
928 s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
929 }
930 case iNotNL:
931 if c != endOfText && c != '\n' {
932 s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
933 }
934 case iBra:
935 case iAlt:
936 case iEnd:
937
938 if !found ||
939 st.match.m[0] < final.match.m[0] ||
940 (st.match.m[0] == final.match.m[0] && thisPos > final.match.m[1]) {
941 if final.match != nil {
942 arena.free(final.match)
943 }
944 final = st
945 final.match.ref++
946 final.match.m[1] = thisPos
947 }
948 found = true
949 default:
950 st.inst.print()
951 panic("unknown instruction in execute")
952 }
953 }
954 }
955 if final.match == nil {
956 return nil
957 }
958
959 if final.prefixed && len(final.match.m) > 0 {
960 final.match.m[0] -= len(re.prefix)
961 }
962 return final.match.m
963 }
964
965
966
967
968 func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
969 c := make([]int, len(re.inst)-2)
970
971 i := 0
972 for inst := re.inst[0].next; inst.kind != iEnd; inst = inst.next {
973
974 if inst.kind != iChar {
975 return string(c[:i]), false
976 }
977 c[i] = inst.char
978 i++
979 }
980 return string(c[:i]), true
981 }
982
983
984
985
986 func (re *Regexp) MatchReader(r io.RuneReader) bool {
987 return len(re.doExecute(newInputReader(r), 0)) > 0
988 }
989
990
991
992 func (re *Regexp) MatchString(s string) bool { return len(re.doExecute(newInputString(s), 0)) > 0 }
993
994
995
996 func (re *Regexp) Match(b []byte) bool { return len(re.doExecute(newInputBytes(b), 0)) > 0 }
997
998
999
1000
1001 func MatchReader(pattern string, r io.RuneReader) (matched bool, error os.Error) {
1002 re, err := Compile(pattern)
1003 if err != nil {
1004 return false, err
1005 }
1006 return re.MatchReader(r), nil
1007 }
1008
1009
1010
1011
1012 func MatchString(pattern string, s string) (matched bool, error os.Error) {
1013 re, err := Compile(pattern)
1014 if err != nil {
1015 return false, err
1016 }
1017 return re.MatchString(s), nil
1018 }
1019
1020
1021
1022
1023 func Match(pattern string, b []byte) (matched bool, error os.Error) {
1024 re, err := Compile(pattern)
1025 if err != nil {
1026 return false, err
1027 }
1028 return re.Match(b), nil
1029 }
1030
1031
1032
1033
1034 func (re *Regexp) ReplaceAllString(src, repl string) string {
1035 return re.ReplaceAllStringFunc(src, func(string) string { return repl })
1036 }
1037
1038
1039
1040
1041
1042 func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
1043 lastMatchEnd := 0
1044 searchPos := 0
1045 buf := new(bytes.Buffer)
1046 for searchPos <= len(src) {
1047 a := re.doExecute(newInputString(src), searchPos)
1048 if len(a) == 0 {
1049 break
1050 }
1051
1052
1053 io.WriteString(buf, src[lastMatchEnd:a[0]])
1054
1055
1056
1057
1058
1059 if a[1] > lastMatchEnd || a[0] == 0 {
1060 io.WriteString(buf, repl(src[a[0]:a[1]]))
1061 }
1062 lastMatchEnd = a[1]
1063
1064
1065 _, width := utf8.DecodeRuneInString(src[searchPos:])
1066 if searchPos+width > a[1] {
1067 searchPos += width
1068 } else if searchPos+1 > a[1] {
1069
1070
1071 searchPos++
1072 } else {
1073 searchPos = a[1]
1074 }
1075 }
1076
1077
1078 io.WriteString(buf, src[lastMatchEnd:])
1079
1080 return buf.String()
1081 }
1082
1083
1084
1085
1086 func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
1087 return re.ReplaceAllFunc(src, func([]byte) []byte { return repl })
1088 }
1089
1090
1091
1092
1093
1094 func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
1095 lastMatchEnd := 0
1096 searchPos := 0
1097 buf := new(bytes.Buffer)
1098 for searchPos <= len(src) {
1099 a := re.doExecute(newInputBytes(src), searchPos)
1100 if len(a) == 0 {
1101 break
1102 }
1103
1104
1105 buf.Write(src[lastMatchEnd:a[0]])
1106
1107
1108
1109
1110
1111 if a[1] > lastMatchEnd || a[0] == 0 {
1112 buf.Write(repl(src[a[0]:a[1]]))
1113 }
1114 lastMatchEnd = a[1]
1115
1116
1117 _, width := utf8.DecodeRune(src[searchPos:])
1118 if searchPos+width > a[1] {
1119 searchPos += width
1120 } else if searchPos+1 > a[1] {
1121
1122
1123 searchPos++
1124 } else {
1125 searchPos = a[1]
1126 }
1127 }
1128
1129
1130 buf.Write(src[lastMatchEnd:])
1131
1132 return buf.Bytes()
1133 }
1134
1135
1136
1137
1138 func QuoteMeta(s string) string {
1139 b := make([]byte, 2*len(s))
1140
1141
1142 j := 0
1143 for i := 0; i < len(s); i++ {
1144 if special(int(s[i])) {
1145 b[j] = '\\'
1146 j++
1147 }
1148 b[j] = s[i]
1149 j++
1150 }
1151 return string(b[0:j])
1152 }
1153
1154
1155 func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
1156 var end int
1157 if b == nil {
1158 end = len(s)
1159 } else {
1160 end = len(b)
1161 }
1162
1163 for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
1164 var in input
1165 if b == nil {
1166 in = newInputString(s)
1167 } else {
1168 in = newInputBytes(b)
1169 }
1170 matches := re.doExecute(in, pos)
1171 if len(matches) == 0 {
1172 break
1173 }
1174
1175 accept := true
1176 if matches[1] == pos {
1177
1178 if matches[0] == prevMatchEnd {
1179
1180
1181 accept = false
1182 }
1183 var width int
1184
1185 if b == nil {
1186 _, width = utf8.DecodeRuneInString(s[pos:end])
1187 } else {
1188 _, width = utf8.DecodeRune(b[pos:end])
1189 }
1190 if width > 0 {
1191 pos += width
1192 } else {
1193 pos = end + 1
1194 }
1195 } else {
1196 pos = matches[1]
1197 }
1198 prevMatchEnd = matches[1]
1199
1200 if accept {
1201 deliver(matches)
1202 i++
1203 }
1204 }
1205 }
1206
1207
1208
1209 func (re *Regexp) Find(b []byte) []byte {
1210 a := re.doExecute(newInputBytes(b), 0)
1211 if a == nil {
1212 return nil
1213 }
1214 return b[a[0]:a[1]]
1215 }
1216
1217
1218
1219
1220
1221 func (re *Regexp) FindIndex(b []byte) (loc []int) {
1222 a := re.doExecute(newInputBytes(b), 0)
1223 if a == nil {
1224 return nil
1225 }
1226 return a[0:2]
1227 }
1228
1229
1230
1231
1232
1233
1234 func (re *Regexp) FindString(s string) string {
1235 a := re.doExecute(newInputString(s), 0)
1236 if a == nil {
1237 return ""
1238 }
1239 return s[a[0]:a[1]]
1240 }
1241
1242
1243
1244
1245
1246 func (re *Regexp) FindStringIndex(s string) []int {
1247 a := re.doExecute(newInputString(s), 0)
1248 if a == nil {
1249 return nil
1250 }
1251 return a[0:2]
1252 }
1253
1254
1255
1256
1257
1258 func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
1259 a := re.doExecute(newInputReader(r), 0)
1260 if a == nil {
1261 return nil
1262 }
1263 return a[0:2]
1264 }
1265
1266
1267
1268
1269
1270
1271 func (re *Regexp) FindSubmatch(b []byte) [][]byte {
1272 a := re.doExecute(newInputBytes(b), 0)
1273 if a == nil {
1274 return nil
1275 }
1276 ret := make([][]byte, len(a)/2)
1277 for i := range ret {
1278 if a[2*i] >= 0 {
1279 ret[i] = b[a[2*i]:a[2*i+1]]
1280 }
1281 }
1282 return ret
1283 }
1284
1285
1286
1287
1288
1289
1290 func (re *Regexp) FindSubmatchIndex(b []byte) []int {
1291 return re.doExecute(newInputBytes(b), 0)
1292 }
1293
1294
1295
1296
1297
1298
1299 func (re *Regexp) FindStringSubmatch(s string) []string {
1300 a := re.doExecute(newInputString(s), 0)
1301 if a == nil {
1302 return nil
1303 }
1304 ret := make([]string, len(a)/2)
1305 for i := range ret {
1306 if a[2*i] >= 0 {
1307 ret[i] = s[a[2*i]:a[2*i+1]]
1308 }
1309 }
1310 return ret
1311 }
1312
1313
1314
1315
1316
1317
1318 func (re *Regexp) FindStringSubmatchIndex(s string) []int {
1319 return re.doExecute(newInputString(s), 0)
1320 }
1321
1322
1323
1324
1325
1326
1327 func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
1328 return re.doExecute(newInputReader(r), 0)
1329 }
1330
1331 const startSize = 10
1332
1333
1334
1335
1336
1337 func (re *Regexp) FindAll(b []byte, n int) [][]byte {
1338 if n < 0 {
1339 n = len(b) + 1
1340 }
1341 result := make([][]byte, 0, startSize)
1342 re.allMatches("", b, n, func(match []int) {
1343 result = append(result, b[match[0]:match[1]])
1344 })
1345 if len(result) == 0 {
1346 return nil
1347 }
1348 return result
1349 }
1350
1351
1352
1353
1354
1355 func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
1356 if n < 0 {
1357 n = len(b) + 1
1358 }
1359 result := make([][]int, 0, startSize)
1360 re.allMatches("", b, n, func(match []int) {
1361 result = append(result, match[0:2])
1362 })
1363 if len(result) == 0 {
1364 return nil
1365 }
1366 return result
1367 }
1368
1369
1370
1371
1372
1373 func (re *Regexp) FindAllString(s string, n int) []string {
1374 if n < 0 {
1375 n = len(s) + 1
1376 }
1377 result := make([]string, 0, startSize)
1378 re.allMatches(s, nil, n, func(match []int) {
1379 result = append(result, s[match[0]:match[1]])
1380 })
1381 if len(result) == 0 {
1382 return nil
1383 }
1384 return result
1385 }
1386
1387
1388
1389
1390
1391 func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
1392 if n < 0 {
1393 n = len(s) + 1
1394 }
1395 result := make([][]int, 0, startSize)
1396 re.allMatches(s, nil, n, func(match []int) {
1397 result = append(result, match[0:2])
1398 })
1399 if len(result) == 0 {
1400 return nil
1401 }
1402 return result
1403 }
1404
1405
1406
1407
1408
1409 func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
1410 if n < 0 {
1411 n = len(b) + 1
1412 }
1413 result := make([][][]byte, 0, startSize)
1414 re.allMatches("", b, n, func(match []int) {
1415 slice := make([][]byte, len(match)/2)
1416 for j := range slice {
1417 if match[2*j] >= 0 {
1418 slice[j] = b[match[2*j]:match[2*j+1]]
1419 }
1420 }
1421 result = append(result, slice)
1422 })
1423 if len(result) == 0 {
1424 return nil
1425 }
1426 return result
1427 }
1428
1429
1430
1431
1432
1433 func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
1434 if n < 0 {
1435 n = len(b) + 1
1436 }
1437 result := make([][]int, 0, startSize)
1438 re.allMatches("", b, n, func(match []int) {
1439 result = append(result, match)
1440 })
1441 if len(result) == 0 {
1442 return nil
1443 }
1444 return result
1445 }
1446
1447
1448
1449
1450
1451 func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
1452 if n < 0 {
1453 n = len(s) + 1
1454 }
1455 result := make([][]string, 0, startSize)
1456 re.allMatches(s, nil, n, func(match []int) {
1457 slice := make([]string, len(match)/2)
1458 for j := range slice {
1459 if match[2*j] >= 0 {
1460 slice[j] = s[match[2*j]:match[2*j+1]]
1461 }
1462 }
1463 result = append(result, slice)
1464 })
1465 if len(result) == 0 {
1466 return nil
1467 }
1468 return result
1469 }
1470
1471
1472
1473
1474
1475
1476 func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
1477 if n < 0 {
1478 n = len(s) + 1
1479 }
1480 result := make([][]int, 0, startSize)
1481 re.allMatches(s, nil, n, func(match []int) {
1482 result = append(result, match)
1483 })
1484 if len(result) == 0 {
1485 return nil
1486 }
1487 return result
1488 }