Source file
src/strings/strings.go
Documentation: strings
1
2
3
4
5
6
7
8 package strings
9
10 import (
11 "internal/bytealg"
12 "unicode"
13 "unicode/utf8"
14 )
15
16
17
18
19 func explode(s string, n int) []string {
20 l := utf8.RuneCountInString(s)
21 if n < 0 || n > l {
22 n = l
23 }
24 a := make([]string, n)
25 for i := 0; i < n-1; i++ {
26 ch, size := utf8.DecodeRuneInString(s)
27 a[i] = s[:size]
28 s = s[size:]
29 if ch == utf8.RuneError {
30 a[i] = string(utf8.RuneError)
31 }
32 }
33 if n > 0 {
34 a[n-1] = s
35 }
36 return a
37 }
38
39
40 const primeRK = 16777619
41
42
43
44 func hashStr(sep string) (uint32, uint32) {
45 hash := uint32(0)
46 for i := 0; i < len(sep); i++ {
47 hash = hash*primeRK + uint32(sep[i])
48 }
49 var pow, sq uint32 = 1, primeRK
50 for i := len(sep); i > 0; i >>= 1 {
51 if i&1 != 0 {
52 pow *= sq
53 }
54 sq *= sq
55 }
56 return hash, pow
57 }
58
59
60
61 func hashStrRev(sep string) (uint32, uint32) {
62 hash := uint32(0)
63 for i := len(sep) - 1; i >= 0; i-- {
64 hash = hash*primeRK + uint32(sep[i])
65 }
66 var pow, sq uint32 = 1, primeRK
67 for i := len(sep); i > 0; i >>= 1 {
68 if i&1 != 0 {
69 pow *= sq
70 }
71 sq *= sq
72 }
73 return hash, pow
74 }
75
76
77
78 func Count(s, substr string) int {
79
80 if len(substr) == 0 {
81 return utf8.RuneCountInString(s) + 1
82 }
83 if len(substr) == 1 {
84 return bytealg.CountString(s, substr[0])
85 }
86 n := 0
87 for {
88 i := Index(s, substr)
89 if i == -1 {
90 return n
91 }
92 n++
93 s = s[i+len(substr):]
94 }
95 }
96
97
98 func Contains(s, substr string) bool {
99 return Index(s, substr) >= 0
100 }
101
102
103 func ContainsAny(s, chars string) bool {
104 return IndexAny(s, chars) >= 0
105 }
106
107
108 func ContainsRune(s string, r rune) bool {
109 return IndexRune(s, r) >= 0
110 }
111
112
113 func LastIndex(s, substr string) int {
114 n := len(substr)
115 switch {
116 case n == 0:
117 return len(s)
118 case n == 1:
119 return LastIndexByte(s, substr[0])
120 case n == len(s):
121 if substr == s {
122 return 0
123 }
124 return -1
125 case n > len(s):
126 return -1
127 }
128
129 hashss, pow := hashStrRev(substr)
130 last := len(s) - n
131 var h uint32
132 for i := len(s) - 1; i >= last; i-- {
133 h = h*primeRK + uint32(s[i])
134 }
135 if h == hashss && s[last:] == substr {
136 return last
137 }
138 for i := last - 1; i >= 0; i-- {
139 h *= primeRK
140 h += uint32(s[i])
141 h -= pow * uint32(s[i+n])
142 if h == hashss && s[i:i+n] == substr {
143 return i
144 }
145 }
146 return -1
147 }
148
149
150
151
152
153 func IndexRune(s string, r rune) int {
154 switch {
155 case 0 <= r && r < utf8.RuneSelf:
156 return IndexByte(s, byte(r))
157 case r == utf8.RuneError:
158 for i, r := range s {
159 if r == utf8.RuneError {
160 return i
161 }
162 }
163 return -1
164 case !utf8.ValidRune(r):
165 return -1
166 default:
167 return Index(s, string(r))
168 }
169 }
170
171
172
173 func IndexAny(s, chars string) int {
174 if chars == "" {
175
176 return -1
177 }
178 if len(s) > 8 {
179 if as, isASCII := makeASCIISet(chars); isASCII {
180 for i := 0; i < len(s); i++ {
181 if as.contains(s[i]) {
182 return i
183 }
184 }
185 return -1
186 }
187 }
188 for i, c := range s {
189 for _, m := range chars {
190 if c == m {
191 return i
192 }
193 }
194 }
195 return -1
196 }
197
198
199
200
201 func LastIndexAny(s, chars string) int {
202 if chars == "" {
203
204 return -1
205 }
206 if len(s) > 8 {
207 if as, isASCII := makeASCIISet(chars); isASCII {
208 for i := len(s) - 1; i >= 0; i-- {
209 if as.contains(s[i]) {
210 return i
211 }
212 }
213 return -1
214 }
215 }
216 for i := len(s); i > 0; {
217 r, size := utf8.DecodeLastRuneInString(s[:i])
218 i -= size
219 for _, c := range chars {
220 if r == c {
221 return i
222 }
223 }
224 }
225 return -1
226 }
227
228
229 func LastIndexByte(s string, c byte) int {
230 for i := len(s) - 1; i >= 0; i-- {
231 if s[i] == c {
232 return i
233 }
234 }
235 return -1
236 }
237
238
239
240 func genSplit(s, sep string, sepSave, n int) []string {
241 if n == 0 {
242 return nil
243 }
244 if sep == "" {
245 return explode(s, n)
246 }
247 if n < 0 {
248 n = Count(s, sep) + 1
249 }
250
251 a := make([]string, n)
252 n--
253 i := 0
254 for i < n {
255 m := Index(s, sep)
256 if m < 0 {
257 break
258 }
259 a[i] = s[:m+sepSave]
260 s = s[m+len(sep):]
261 i++
262 }
263 a[i] = s
264 return a[:i+1]
265 }
266
267
268
269
270
271
272
273
274
275
276
277 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
278
279
280
281
282
283
284
285
286
287
288
289 func SplitAfterN(s, sep string, n int) []string {
290 return genSplit(s, sep, len(sep), n)
291 }
292
293
294
295
296
297
298
299
300
301
302
303 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
304
305
306
307
308
309
310
311
312
313
314
315 func SplitAfter(s, sep string) []string {
316 return genSplit(s, sep, len(sep), -1)
317 }
318
319 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
320
321
322
323
324 func Fields(s string) []string {
325
326
327 n := 0
328 wasSpace := 1
329
330 setBits := uint8(0)
331 for i := 0; i < len(s); i++ {
332 r := s[i]
333 setBits |= r
334 isSpace := int(asciiSpace[r])
335 n += wasSpace & ^isSpace
336 wasSpace = isSpace
337 }
338
339 if setBits < utf8.RuneSelf {
340 a := make([]string, n)
341 na := 0
342 fieldStart := 0
343 i := 0
344
345 for i < len(s) && asciiSpace[s[i]] != 0 {
346 i++
347 }
348 fieldStart = i
349 for i < len(s) {
350 if asciiSpace[s[i]] == 0 {
351 i++
352 continue
353 }
354 a[na] = s[fieldStart:i]
355 na++
356 i++
357
358 for i < len(s) && asciiSpace[s[i]] != 0 {
359 i++
360 }
361 fieldStart = i
362 }
363 if fieldStart < len(s) {
364 a[na] = s[fieldStart:]
365 }
366 return a
367 }
368
369
370 return FieldsFunc(s, unicode.IsSpace)
371 }
372
373
374
375
376
377
378 func FieldsFunc(s string, f func(rune) bool) []string {
379
380
381 type span struct {
382 start int
383 end int
384 }
385 spans := make([]span, 0, 32)
386
387
388 wasField := false
389 fromIndex := 0
390 for i, rune := range s {
391 if f(rune) {
392 if wasField {
393 spans = append(spans, span{start: fromIndex, end: i})
394 wasField = false
395 }
396 } else {
397 if !wasField {
398 fromIndex = i
399 wasField = true
400 }
401 }
402 }
403
404
405 if wasField {
406 spans = append(spans, span{fromIndex, len(s)})
407 }
408
409
410 a := make([]string, len(spans))
411 for i, span := range spans {
412 a[i] = s[span.start:span.end]
413 }
414
415 return a
416 }
417
418
419
420 func Join(a []string, sep string) string {
421 switch len(a) {
422 case 0:
423 return ""
424 case 1:
425 return a[0]
426 case 2:
427
428
429 return a[0] + sep + a[1]
430 case 3:
431
432
433 return a[0] + sep + a[1] + sep + a[2]
434 }
435 n := len(sep) * (len(a) - 1)
436 for i := 0; i < len(a); i++ {
437 n += len(a[i])
438 }
439
440 b := make([]byte, n)
441 bp := copy(b, a[0])
442 for _, s := range a[1:] {
443 bp += copy(b[bp:], sep)
444 bp += copy(b[bp:], s)
445 }
446 return string(b)
447 }
448
449
450 func HasPrefix(s, prefix string) bool {
451 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
452 }
453
454
455 func HasSuffix(s, suffix string) bool {
456 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
457 }
458
459
460
461
462 func Map(mapping func(rune) rune, s string) string {
463
464
465
466
467
468
469 var b []byte
470
471 var nbytes int
472
473 for i, c := range s {
474 r := mapping(c)
475 if r == c {
476 continue
477 }
478
479 b = make([]byte, len(s)+utf8.UTFMax)
480 nbytes = copy(b, s[:i])
481 if r >= 0 {
482 if r < utf8.RuneSelf {
483 b[nbytes] = byte(r)
484 nbytes++
485 } else {
486 nbytes += utf8.EncodeRune(b[nbytes:], r)
487 }
488 }
489
490 if c == utf8.RuneError {
491
492
493
494 _, w := utf8.DecodeRuneInString(s[i:])
495 i += w
496 } else {
497 i += utf8.RuneLen(c)
498 }
499
500 s = s[i:]
501 break
502 }
503
504 if b == nil {
505 return s
506 }
507
508 for _, c := range s {
509 r := mapping(c)
510
511
512 if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) {
513 b[nbytes] = byte(r)
514 nbytes++
515 continue
516 }
517
518
519 if r >= 0 {
520 if nbytes+utf8.UTFMax >= len(b) {
521
522 nb := make([]byte, 2*len(b))
523 copy(nb, b[:nbytes])
524 b = nb
525 }
526 nbytes += utf8.EncodeRune(b[nbytes:], r)
527 }
528 }
529
530 return string(b[:nbytes])
531 }
532
533
534
535
536
537 func Repeat(s string, count int) string {
538
539
540
541
542 if count < 0 {
543 panic("strings: negative Repeat count")
544 } else if count > 0 && len(s)*count/count != len(s) {
545 panic("strings: Repeat count causes overflow")
546 }
547
548 b := make([]byte, len(s)*count)
549 bp := copy(b, s)
550 for bp < len(b) {
551 copy(b[bp:], b[:bp])
552 bp *= 2
553 }
554 return string(b)
555 }
556
557
558 func ToUpper(s string) string {
559 isASCII, hasLower := true, false
560 for i := 0; i < len(s); i++ {
561 c := s[i]
562 if c >= utf8.RuneSelf {
563 isASCII = false
564 break
565 }
566 hasLower = hasLower || (c >= 'a' && c <= 'z')
567 }
568
569 if isASCII {
570 if !hasLower {
571 return s
572 }
573 b := make([]byte, len(s))
574 for i := 0; i < len(s); i++ {
575 c := s[i]
576 if c >= 'a' && c <= 'z' {
577 c -= 'a' - 'A'
578 }
579 b[i] = c
580 }
581 return string(b)
582 }
583 return Map(unicode.ToUpper, s)
584 }
585
586
587 func ToLower(s string) string {
588 isASCII, hasUpper := true, false
589 for i := 0; i < len(s); i++ {
590 c := s[i]
591 if c >= utf8.RuneSelf {
592 isASCII = false
593 break
594 }
595 hasUpper = hasUpper || (c >= 'A' && c <= 'Z')
596 }
597
598 if isASCII {
599 if !hasUpper {
600 return s
601 }
602 b := make([]byte, len(s))
603 for i := 0; i < len(s); i++ {
604 c := s[i]
605 if c >= 'A' && c <= 'Z' {
606 c += 'a' - 'A'
607 }
608 b[i] = c
609 }
610 return string(b)
611 }
612 return Map(unicode.ToLower, s)
613 }
614
615
616 func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
617
618
619
620 func ToUpperSpecial(c unicode.SpecialCase, s string) string {
621 return Map(func(r rune) rune { return c.ToUpper(r) }, s)
622 }
623
624
625
626 func ToLowerSpecial(c unicode.SpecialCase, s string) string {
627 return Map(func(r rune) rune { return c.ToLower(r) }, s)
628 }
629
630
631
632 func ToTitleSpecial(c unicode.SpecialCase, s string) string {
633 return Map(func(r rune) rune { return c.ToTitle(r) }, s)
634 }
635
636
637
638 func isSeparator(r rune) bool {
639
640 if r <= 0x7F {
641 switch {
642 case '0' <= r && r <= '9':
643 return false
644 case 'a' <= r && r <= 'z':
645 return false
646 case 'A' <= r && r <= 'Z':
647 return false
648 case r == '_':
649 return false
650 }
651 return true
652 }
653
654 if unicode.IsLetter(r) || unicode.IsDigit(r) {
655 return false
656 }
657
658 return unicode.IsSpace(r)
659 }
660
661
662
663
664
665 func Title(s string) string {
666
667
668
669 prev := ' '
670 return Map(
671 func(r rune) rune {
672 if isSeparator(prev) {
673 prev = r
674 return unicode.ToTitle(r)
675 }
676 prev = r
677 return r
678 },
679 s)
680 }
681
682
683
684 func TrimLeftFunc(s string, f func(rune) bool) string {
685 i := indexFunc(s, f, false)
686 if i == -1 {
687 return ""
688 }
689 return s[i:]
690 }
691
692
693
694 func TrimRightFunc(s string, f func(rune) bool) string {
695 i := lastIndexFunc(s, f, false)
696 if i >= 0 && s[i] >= utf8.RuneSelf {
697 _, wid := utf8.DecodeRuneInString(s[i:])
698 i += wid
699 } else {
700 i++
701 }
702 return s[0:i]
703 }
704
705
706
707 func TrimFunc(s string, f func(rune) bool) string {
708 return TrimRightFunc(TrimLeftFunc(s, f), f)
709 }
710
711
712
713 func IndexFunc(s string, f func(rune) bool) int {
714 return indexFunc(s, f, true)
715 }
716
717
718
719 func LastIndexFunc(s string, f func(rune) bool) int {
720 return lastIndexFunc(s, f, true)
721 }
722
723
724
725
726 func indexFunc(s string, f func(rune) bool, truth bool) int {
727 for i, r := range s {
728 if f(r) == truth {
729 return i
730 }
731 }
732 return -1
733 }
734
735
736
737
738 func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
739 for i := len(s); i > 0; {
740 r, size := utf8.DecodeLastRuneInString(s[0:i])
741 i -= size
742 if f(r) == truth {
743 return i
744 }
745 }
746 return -1
747 }
748
749
750
751
752
753
754
755 type asciiSet [8]uint32
756
757
758
759 func makeASCIISet(chars string) (as asciiSet, ok bool) {
760 for i := 0; i < len(chars); i++ {
761 c := chars[i]
762 if c >= utf8.RuneSelf {
763 return as, false
764 }
765 as[c>>5] |= 1 << uint(c&31)
766 }
767 return as, true
768 }
769
770
771 func (as *asciiSet) contains(c byte) bool {
772 return (as[c>>5] & (1 << uint(c&31))) != 0
773 }
774
775 func makeCutsetFunc(cutset string) func(rune) bool {
776 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
777 return func(r rune) bool {
778 return r == rune(cutset[0])
779 }
780 }
781 if as, isASCII := makeASCIISet(cutset); isASCII {
782 return func(r rune) bool {
783 return r < utf8.RuneSelf && as.contains(byte(r))
784 }
785 }
786 return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
787 }
788
789
790
791 func Trim(s string, cutset string) string {
792 if s == "" || cutset == "" {
793 return s
794 }
795 return TrimFunc(s, makeCutsetFunc(cutset))
796 }
797
798
799
800
801
802 func TrimLeft(s string, cutset string) string {
803 if s == "" || cutset == "" {
804 return s
805 }
806 return TrimLeftFunc(s, makeCutsetFunc(cutset))
807 }
808
809
810
811
812
813 func TrimRight(s string, cutset string) string {
814 if s == "" || cutset == "" {
815 return s
816 }
817 return TrimRightFunc(s, makeCutsetFunc(cutset))
818 }
819
820
821
822 func TrimSpace(s string) string {
823 return TrimFunc(s, unicode.IsSpace)
824 }
825
826
827
828 func TrimPrefix(s, prefix string) string {
829 if HasPrefix(s, prefix) {
830 return s[len(prefix):]
831 }
832 return s
833 }
834
835
836
837 func TrimSuffix(s, suffix string) string {
838 if HasSuffix(s, suffix) {
839 return s[:len(s)-len(suffix)]
840 }
841 return s
842 }
843
844
845
846
847
848
849
850 func Replace(s, old, new string, n int) string {
851 if old == new || n == 0 {
852 return s
853 }
854
855
856 if m := Count(s, old); m == 0 {
857 return s
858 } else if n < 0 || m < n {
859 n = m
860 }
861
862
863 t := make([]byte, len(s)+n*(len(new)-len(old)))
864 w := 0
865 start := 0
866 for i := 0; i < n; i++ {
867 j := start
868 if len(old) == 0 {
869 if i > 0 {
870 _, wid := utf8.DecodeRuneInString(s[start:])
871 j += wid
872 }
873 } else {
874 j += Index(s[start:], old)
875 }
876 w += copy(t[w:], s[start:j])
877 w += copy(t[w:], new)
878 start = j + len(old)
879 }
880 w += copy(t[w:], s[start:])
881 return string(t[0:w])
882 }
883
884
885
886 func EqualFold(s, t string) bool {
887 for s != "" && t != "" {
888
889 var sr, tr rune
890 if s[0] < utf8.RuneSelf {
891 sr, s = rune(s[0]), s[1:]
892 } else {
893 r, size := utf8.DecodeRuneInString(s)
894 sr, s = r, s[size:]
895 }
896 if t[0] < utf8.RuneSelf {
897 tr, t = rune(t[0]), t[1:]
898 } else {
899 r, size := utf8.DecodeRuneInString(t)
900 tr, t = r, t[size:]
901 }
902
903
904
905
906 if tr == sr {
907 continue
908 }
909
910
911 if tr < sr {
912 tr, sr = sr, tr
913 }
914
915 if tr < utf8.RuneSelf {
916
917 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
918 continue
919 }
920 return false
921 }
922
923
924
925 r := unicode.SimpleFold(sr)
926 for r != sr && r < tr {
927 r = unicode.SimpleFold(r)
928 }
929 if r == tr {
930 continue
931 }
932 return false
933 }
934
935
936 return s == t
937 }
938
939
940 func Index(s, substr string) int {
941 n := len(substr)
942 switch {
943 case n == 0:
944 return 0
945 case n == 1:
946 return IndexByte(s, substr[0])
947 case n == len(s):
948 if substr == s {
949 return 0
950 }
951 return -1
952 case n > len(s):
953 return -1
954 case n <= bytealg.MaxLen:
955
956 if len(s) <= bytealg.MaxBruteForce {
957 return bytealg.IndexString(s, substr)
958 }
959 c := substr[0]
960 i := 0
961 t := s[:len(s)-n+1]
962 fails := 0
963 for i < len(t) {
964 if t[i] != c {
965
966
967 o := IndexByte(t[i:], c)
968 if o < 0 {
969 return -1
970 }
971 i += o
972 }
973 if s[i:i+n] == substr {
974 return i
975 }
976 fails++
977 i++
978
979 if fails > bytealg.Cutover(i) {
980 r := bytealg.IndexString(s[i:], substr)
981 if r >= 0 {
982 return r + i
983 }
984 return -1
985 }
986 }
987 return -1
988 }
989 c := substr[0]
990 i := 0
991 t := s[:len(s)-n+1]
992 fails := 0
993 for i < len(t) {
994 if t[i] != c {
995 o := IndexByte(t[i:], c)
996 if o < 0 {
997 return -1
998 }
999 i += o
1000 }
1001 if s[i:i+n] == substr {
1002 return i
1003 }
1004 i++
1005 fails++
1006 if fails >= 4+i>>4 && i < len(t) {
1007
1008 j := indexRabinKarp(s[i:], substr)
1009 if j < 0 {
1010 return -1
1011 }
1012 return i + j
1013 }
1014 }
1015 return -1
1016 }
1017
1018 func indexRabinKarp(s, substr string) int {
1019
1020 hashss, pow := hashStr(substr)
1021 n := len(substr)
1022 var h uint32
1023 for i := 0; i < n; i++ {
1024 h = h*primeRK + uint32(s[i])
1025 }
1026 if h == hashss && s[:n] == substr {
1027 return 0
1028 }
1029 for i := n; i < len(s); {
1030 h *= primeRK
1031 h += uint32(s[i])
1032 h -= pow * uint32(s[i-n])
1033 i++
1034 if h == hashss && s[i-n:i] == substr {
1035 return i - n
1036 }
1037 }
1038 return -1
1039
1040 }
1041
View as plain text