Source file src/pkg/regexp/regexp.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 package regexp
58
59 import (
60 "bytes"
61 "io"
62 "regexp/syntax"
63 "strconv"
64 "strings"
65 "sync"
66 "unicode"
67 "unicode/utf8"
68 )
69
70 var debug = false
71
72
73
74
75 type Regexp struct {
76
77 expr string
78 prog *syntax.Prog
79 prefix string
80 prefixBytes []byte
81 prefixComplete bool
82 prefixRune rune
83 cond syntax.EmptyOp
84 numSubexp int
85 subexpNames []string
86 longest bool
87
88
89 mu sync.Mutex
90 machine []*machine
91 }
92
93
94 func (re *Regexp) String() string {
95 return re.expr
96 }
97
98
99
100
101
102
103
104
105
106
107
108 func Compile(expr string) (*Regexp, error) {
109 return compile(expr, syntax.Perl, false)
110 }
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 func CompilePOSIX(expr string) (*Regexp, error) {
132 return compile(expr, syntax.POSIX, true)
133 }
134
135
136
137
138
139 func (re *Regexp) Longest() {
140 re.longest = true
141 }
142
143 func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
144 re, err := syntax.Parse(expr, mode)
145 if err != nil {
146 return nil, err
147 }
148 maxCap := re.MaxCap()
149 capNames := re.CapNames()
150
151 re = re.Simplify()
152 prog, err := syntax.Compile(re)
153 if err != nil {
154 return nil, err
155 }
156 regexp := &Regexp{
157 expr: expr,
158 prog: prog,
159 numSubexp: maxCap,
160 subexpNames: capNames,
161 cond: prog.StartCond(),
162 longest: longest,
163 }
164 regexp.prefix, regexp.prefixComplete = prog.Prefix()
165 if regexp.prefix != "" {
166
167
168 regexp.prefixBytes = []byte(regexp.prefix)
169 regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
170 }
171 return regexp, nil
172 }
173
174
175
176
177 func (re *Regexp) get() *machine {
178 re.mu.Lock()
179 if n := len(re.machine); n > 0 {
180 z := re.machine[n-1]
181 re.machine = re.machine[:n-1]
182 re.mu.Unlock()
183 return z
184 }
185 re.mu.Unlock()
186 z := progMachine(re.prog)
187 z.re = re
188 return z
189 }
190
191
192
193
194
195 func (re *Regexp) put(z *machine) {
196 re.mu.Lock()
197 re.machine = append(re.machine, z)
198 re.mu.Unlock()
199 }
200
201
202
203
204 func MustCompile(str string) *Regexp {
205 regexp, error := Compile(str)
206 if error != nil {
207 panic(`regexp: Compile(` + quote(str) + `): ` + error.Error())
208 }
209 return regexp
210 }
211
212
213
214
215 func MustCompilePOSIX(str string) *Regexp {
216 regexp, error := CompilePOSIX(str)
217 if error != nil {
218 panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + error.Error())
219 }
220 return regexp
221 }
222
223 func quote(s string) string {
224 if strconv.CanBackquote(s) {
225 return "`" + s + "`"
226 }
227 return strconv.Quote(s)
228 }
229
230
231 func (re *Regexp) NumSubexp() int {
232 return re.numSubexp
233 }
234
235
236
237
238
239
240 func (re *Regexp) SubexpNames() []string {
241 return re.subexpNames
242 }
243
244 const endOfText rune = -1
245
246
247
248 type input interface {
249 step(pos int) (r rune, width int)
250 canCheckPrefix() bool
251 hasPrefix(re *Regexp) bool
252 index(re *Regexp, pos int) int
253 context(pos int) syntax.EmptyOp
254 }
255
256
257 type inputString struct {
258 str string
259 }
260
261 func (i *inputString) step(pos int) (rune, int) {
262 if pos < len(i.str) {
263 c := i.str[pos]
264 if c < utf8.RuneSelf {
265 return rune(c), 1
266 }
267 return utf8.DecodeRuneInString(i.str[pos:])
268 }
269 return endOfText, 0
270 }
271
272 func (i *inputString) canCheckPrefix() bool {
273 return true
274 }
275
276 func (i *inputString) hasPrefix(re *Regexp) bool {
277 return strings.HasPrefix(i.str, re.prefix)
278 }
279
280 func (i *inputString) index(re *Regexp, pos int) int {
281 return strings.Index(i.str[pos:], re.prefix)
282 }
283
284 func (i *inputString) context(pos int) syntax.EmptyOp {
285 r1, r2 := endOfText, endOfText
286 if pos > 0 && pos <= len(i.str) {
287 r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
288 }
289 if pos < len(i.str) {
290 r2, _ = utf8.DecodeRuneInString(i.str[pos:])
291 }
292 return syntax.EmptyOpContext(r1, r2)
293 }
294
295
296 type inputBytes struct {
297 str []byte
298 }
299
300 func (i *inputBytes) step(pos int) (rune, int) {
301 if pos < len(i.str) {
302 c := i.str[pos]
303 if c < utf8.RuneSelf {
304 return rune(c), 1
305 }
306 return utf8.DecodeRune(i.str[pos:])
307 }
308 return endOfText, 0
309 }
310
311 func (i *inputBytes) canCheckPrefix() bool {
312 return true
313 }
314
315 func (i *inputBytes) hasPrefix(re *Regexp) bool {
316 return bytes.HasPrefix(i.str, re.prefixBytes)
317 }
318
319 func (i *inputBytes) index(re *Regexp, pos int) int {
320 return bytes.Index(i.str[pos:], re.prefixBytes)
321 }
322
323 func (i *inputBytes) context(pos int) syntax.EmptyOp {
324 r1, r2 := endOfText, endOfText
325 if pos > 0 && pos <= len(i.str) {
326 r1, _ = utf8.DecodeLastRune(i.str[:pos])
327 }
328 if pos < len(i.str) {
329 r2, _ = utf8.DecodeRune(i.str[pos:])
330 }
331 return syntax.EmptyOpContext(r1, r2)
332 }
333
334
335 type inputReader struct {
336 r io.RuneReader
337 atEOT bool
338 pos int
339 }
340
341 func (i *inputReader) step(pos int) (rune, int) {
342 if !i.atEOT && pos != i.pos {
343 return endOfText, 0
344
345 }
346 r, w, err := i.r.ReadRune()
347 if err != nil {
348 i.atEOT = true
349 return endOfText, 0
350 }
351 i.pos += w
352 return r, w
353 }
354
355 func (i *inputReader) canCheckPrefix() bool {
356 return false
357 }
358
359 func (i *inputReader) hasPrefix(re *Regexp) bool {
360 return false
361 }
362
363 func (i *inputReader) index(re *Regexp, pos int) int {
364 return -1
365 }
366
367 func (i *inputReader) context(pos int) syntax.EmptyOp {
368 return 0
369 }
370
371
372
373
374 func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
375 return re.prefix, re.prefixComplete
376 }
377
378
379
380
381 func (re *Regexp) MatchReader(r io.RuneReader) bool {
382 return re.doExecute(r, nil, "", 0, 0) != nil
383 }
384
385
386
387 func (re *Regexp) MatchString(s string) bool {
388 return re.doExecute(nil, nil, s, 0, 0) != nil
389 }
390
391
392
393 func (re *Regexp) Match(b []byte) bool {
394 return re.doExecute(nil, b, "", 0, 0) != nil
395 }
396
397
398
399
400 func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) {
401 re, err := Compile(pattern)
402 if err != nil {
403 return false, err
404 }
405 return re.MatchReader(r), nil
406 }
407
408
409
410
411 func MatchString(pattern string, s string) (matched bool, err error) {
412 re, err := Compile(pattern)
413 if err != nil {
414 return false, err
415 }
416 return re.MatchString(s), nil
417 }
418
419
420
421
422 func Match(pattern string, b []byte) (matched bool, err error) {
423 re, err := Compile(pattern)
424 if err != nil {
425 return false, err
426 }
427 return re.Match(b), nil
428 }
429
430
431
432
433 func (re *Regexp) ReplaceAllString(src, repl string) string {
434 n := 2
435 if strings.Index(repl, "$") >= 0 {
436 n = 2 * (re.numSubexp + 1)
437 }
438 b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
439 return re.expand(dst, repl, nil, src, match)
440 })
441 return string(b)
442 }
443
444
445
446
447 func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
448 return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
449 return append(dst, repl...)
450 }))
451 }
452
453
454
455
456
457 func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
458 b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
459 return append(dst, repl(src[match[0]:match[1]])...)
460 })
461 return string(b)
462 }
463
464 func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte {
465 lastMatchEnd := 0
466 searchPos := 0
467 var buf []byte
468 var endPos int
469 if bsrc != nil {
470 endPos = len(bsrc)
471 } else {
472 endPos = len(src)
473 }
474 for searchPos <= endPos {
475 a := re.doExecute(nil, bsrc, src, searchPos, nmatch)
476 if len(a) == 0 {
477 break
478 }
479
480
481 if bsrc != nil {
482 buf = append(buf, bsrc[lastMatchEnd:a[0]]...)
483 } else {
484 buf = append(buf, src[lastMatchEnd:a[0]]...)
485 }
486
487
488
489
490
491 if a[1] > lastMatchEnd || a[0] == 0 {
492 buf = repl(buf, a)
493 }
494 lastMatchEnd = a[1]
495
496
497 var width int
498 if bsrc != nil {
499 _, width = utf8.DecodeRune(bsrc[searchPos:])
500 } else {
501 _, width = utf8.DecodeRuneInString(src[searchPos:])
502 }
503 if searchPos+width > a[1] {
504 searchPos += width
505 } else if searchPos+1 > a[1] {
506
507
508 searchPos++
509 } else {
510 searchPos = a[1]
511 }
512 }
513
514
515 if bsrc != nil {
516 buf = append(buf, bsrc[lastMatchEnd:]...)
517 } else {
518 buf = append(buf, src[lastMatchEnd:]...)
519 }
520
521 return buf
522 }
523
524
525
526
527 func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
528 n := 2
529 if bytes.IndexByte(repl, '$') >= 0 {
530 n = 2 * (re.numSubexp + 1)
531 }
532 srepl := ""
533 b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte {
534 if len(srepl) != len(repl) {
535 srepl = string(repl)
536 }
537 return re.expand(dst, srepl, src, "", match)
538 })
539 return b
540 }
541
542
543
544
545 func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
546 return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
547 return append(dst, repl...)
548 })
549 }
550
551
552
553
554
555 func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
556 return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
557 return append(dst, repl(src[match[0]:match[1]])...)
558 })
559 }
560
561 var specialBytes = []byte(`\.+*?()|[]{}^$`)
562
563 func special(b byte) bool {
564 return bytes.IndexByte(specialBytes, b) >= 0
565 }
566
567
568
569
570 func QuoteMeta(s string) string {
571 b := make([]byte, 2*len(s))
572
573
574 j := 0
575 for i := 0; i < len(s); i++ {
576 if special(s[i]) {
577 b[j] = '\\'
578 j++
579 }
580 b[j] = s[i]
581 j++
582 }
583 return string(b[0:j])
584 }
585
586
587
588
589
590
591 func (re *Regexp) pad(a []int) []int {
592 if a == nil {
593
594 return nil
595 }
596 n := (1 + re.numSubexp) * 2
597 for len(a) < n {
598 a = append(a, -1)
599 }
600 return a
601 }
602
603
604 func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
605 var end int
606 if b == nil {
607 end = len(s)
608 } else {
609 end = len(b)
610 }
611
612 for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
613 matches := re.doExecute(nil, b, s, pos, re.prog.NumCap)
614 if len(matches) == 0 {
615 break
616 }
617
618 accept := true
619 if matches[1] == pos {
620
621 if matches[0] == prevMatchEnd {
622
623
624 accept = false
625 }
626 var width int
627
628 if b == nil {
629 _, width = utf8.DecodeRuneInString(s[pos:end])
630 } else {
631 _, width = utf8.DecodeRune(b[pos:end])
632 }
633 if width > 0 {
634 pos += width
635 } else {
636 pos = end + 1
637 }
638 } else {
639 pos = matches[1]
640 }
641 prevMatchEnd = matches[1]
642
643 if accept {
644 deliver(re.pad(matches))
645 i++
646 }
647 }
648 }
649
650
651
652 func (re *Regexp) Find(b []byte) []byte {
653 a := re.doExecute(nil, b, "", 0, 2)
654 if a == nil {
655 return nil
656 }
657 return b[a[0]:a[1]]
658 }
659
660
661
662
663
664 func (re *Regexp) FindIndex(b []byte) (loc []int) {
665 a := re.doExecute(nil, b, "", 0, 2)
666 if a == nil {
667 return nil
668 }
669 return a[0:2]
670 }
671
672
673
674
675
676
677 func (re *Regexp) FindString(s string) string {
678 a := re.doExecute(nil, nil, s, 0, 2)
679 if a == nil {
680 return ""
681 }
682 return s[a[0]:a[1]]
683 }
684
685
686
687
688
689 func (re *Regexp) FindStringIndex(s string) (loc []int) {
690 a := re.doExecute(nil, nil, s, 0, 2)
691 if a == nil {
692 return nil
693 }
694 return a[0:2]
695 }
696
697
698
699
700
701
702 func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
703 a := re.doExecute(r, nil, "", 0, 2)
704 if a == nil {
705 return nil
706 }
707 return a[0:2]
708 }
709
710
711
712
713
714
715 func (re *Regexp) FindSubmatch(b []byte) [][]byte {
716 a := re.doExecute(nil, b, "", 0, re.prog.NumCap)
717 if a == nil {
718 return nil
719 }
720 ret := make([][]byte, 1+re.numSubexp)
721 for i := range ret {
722 if 2*i < len(a) && a[2*i] >= 0 {
723 ret[i] = b[a[2*i]:a[2*i+1]]
724 }
725 }
726 return ret
727 }
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746 func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
747 return re.expand(dst, string(template), src, "", match)
748 }
749
750
751
752
753 func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte {
754 return re.expand(dst, template, nil, src, match)
755 }
756
757 func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
758 for len(template) > 0 {
759 i := strings.Index(template, "$")
760 if i < 0 {
761 break
762 }
763 dst = append(dst, template[:i]...)
764 template = template[i:]
765 if len(template) > 1 && template[1] == '$' {
766
767 dst = append(dst, '$')
768 template = template[2:]
769 continue
770 }
771 name, num, rest, ok := extract(template)
772 if !ok {
773
774 dst = append(dst, '$')
775 template = template[1:]
776 continue
777 }
778 template = rest
779 if num >= 0 {
780 if 2*num+1 < len(match) && match[2*num] >= 0 {
781 if bsrc != nil {
782 dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
783 } else {
784 dst = append(dst, src[match[2*num]:match[2*num+1]]...)
785 }
786 }
787 } else {
788 for i, namei := range re.subexpNames {
789 if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 {
790 if bsrc != nil {
791 dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...)
792 } else {
793 dst = append(dst, src[match[2*i]:match[2*i+1]]...)
794 }
795 break
796 }
797 }
798 }
799 }
800 dst = append(dst, template...)
801 return dst
802 }
803
804
805
806 func extract(str string) (name string, num int, rest string, ok bool) {
807 if len(str) < 2 || str[0] != '$' {
808 return
809 }
810 brace := false
811 if str[1] == '{' {
812 brace = true
813 str = str[2:]
814 } else {
815 str = str[1:]
816 }
817 i := 0
818 for i < len(str) {
819 rune, size := utf8.DecodeRuneInString(str[i:])
820 if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' {
821 break
822 }
823 i += size
824 }
825 if i == 0 {
826
827 return
828 }
829 name = str[:i]
830 if brace {
831 if i >= len(str) || str[i] != '}' {
832
833 return
834 }
835 i++
836 }
837
838
839 num = 0
840 for i := 0; i < len(name); i++ {
841 if name[i] < '0' || '9' < name[i] || num >= 1e8 {
842 num = -1
843 break
844 }
845 num = num*10 + int(name[i]) - '0'
846 }
847
848 if name[0] == '0' && len(name) > 1 {
849 num = -1
850 }
851
852 rest = str[i:]
853 ok = true
854 return
855 }
856
857
858
859
860
861
862 func (re *Regexp) FindSubmatchIndex(b []byte) []int {
863 return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap))
864 }
865
866
867
868
869
870
871 func (re *Regexp) FindStringSubmatch(s string) []string {
872 a := re.doExecute(nil, nil, s, 0, re.prog.NumCap)
873 if a == nil {
874 return nil
875 }
876 ret := make([]string, 1+re.numSubexp)
877 for i := range ret {
878 if 2*i < len(a) && a[2*i] >= 0 {
879 ret[i] = s[a[2*i]:a[2*i+1]]
880 }
881 }
882 return ret
883 }
884
885
886
887
888
889
890 func (re *Regexp) FindStringSubmatchIndex(s string) []int {
891 return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap))
892 }
893
894
895
896
897
898
899 func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
900 return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap))
901 }
902
903 const startSize = 10
904
905
906
907
908
909 func (re *Regexp) FindAll(b []byte, n int) [][]byte {
910 if n < 0 {
911 n = len(b) + 1
912 }
913 result := make([][]byte, 0, startSize)
914 re.allMatches("", b, n, func(match []int) {
915 result = append(result, b[match[0]:match[1]])
916 })
917 if len(result) == 0 {
918 return nil
919 }
920 return result
921 }
922
923
924
925
926
927 func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
928 if n < 0 {
929 n = len(b) + 1
930 }
931 result := make([][]int, 0, startSize)
932 re.allMatches("", b, n, func(match []int) {
933 result = append(result, match[0:2])
934 })
935 if len(result) == 0 {
936 return nil
937 }
938 return result
939 }
940
941
942
943
944
945 func (re *Regexp) FindAllString(s string, n int) []string {
946 if n < 0 {
947 n = len(s) + 1
948 }
949 result := make([]string, 0, startSize)
950 re.allMatches(s, nil, n, func(match []int) {
951 result = append(result, s[match[0]:match[1]])
952 })
953 if len(result) == 0 {
954 return nil
955 }
956 return result
957 }
958
959
960
961
962
963 func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
964 if n < 0 {
965 n = len(s) + 1
966 }
967 result := make([][]int, 0, startSize)
968 re.allMatches(s, nil, n, func(match []int) {
969 result = append(result, match[0:2])
970 })
971 if len(result) == 0 {
972 return nil
973 }
974 return result
975 }
976
977
978
979
980
981 func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
982 if n < 0 {
983 n = len(b) + 1
984 }
985 result := make([][][]byte, 0, startSize)
986 re.allMatches("", b, n, func(match []int) {
987 slice := make([][]byte, len(match)/2)
988 for j := range slice {
989 if match[2*j] >= 0 {
990 slice[j] = b[match[2*j]:match[2*j+1]]
991 }
992 }
993 result = append(result, slice)
994 })
995 if len(result) == 0 {
996 return nil
997 }
998 return result
999 }
1000
1001
1002
1003
1004
1005 func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
1006 if n < 0 {
1007 n = len(b) + 1
1008 }
1009 result := make([][]int, 0, startSize)
1010 re.allMatches("", b, n, func(match []int) {
1011 result = append(result, match)
1012 })
1013 if len(result) == 0 {
1014 return nil
1015 }
1016 return result
1017 }
1018
1019
1020
1021
1022
1023 func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
1024 if n < 0 {
1025 n = len(s) + 1
1026 }
1027 result := make([][]string, 0, startSize)
1028 re.allMatches(s, nil, n, func(match []int) {
1029 slice := make([]string, len(match)/2)
1030 for j := range slice {
1031 if match[2*j] >= 0 {
1032 slice[j] = s[match[2*j]:match[2*j+1]]
1033 }
1034 }
1035 result = append(result, slice)
1036 })
1037 if len(result) == 0 {
1038 return nil
1039 }
1040 return result
1041 }
1042
1043
1044
1045
1046
1047
1048 func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
1049 if n < 0 {
1050 n = len(s) + 1
1051 }
1052 result := make([][]int, 0, startSize)
1053 re.allMatches(s, nil, n, func(match []int) {
1054 result = append(result, match)
1055 })
1056 if len(result) == 0 {
1057 return nil
1058 }
1059 return result
1060 }
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077 func (re *Regexp) Split(s string, n int) []string {
1078
1079 if n == 0 {
1080 return nil
1081 }
1082
1083 if len(re.expr) > 0 && len(s) == 0 {
1084 return []string{""}
1085 }
1086
1087 matches := re.FindAllStringIndex(s, n)
1088 strings := make([]string, 0, len(matches))
1089
1090 beg := 0
1091 end := 0
1092 for _, match := range matches {
1093 if n > 0 && len(strings) >= n-1 {
1094 break
1095 }
1096
1097 end = match[0]
1098 if match[1] != 0 {
1099 strings = append(strings, s[beg:end])
1100 }
1101 beg = match[1]
1102 }
1103
1104 if end != len(s) {
1105 strings = append(strings, s[beg:])
1106 }
1107
1108 return strings
1109 }
View as plain text