Source file
src/bytes/bytes.go
Documentation: bytes
1
2
3
4
5
6
7 package bytes
8
9 import (
10 "internal/bytealg"
11 "unicode"
12 "unicode/utf8"
13 )
14
15 func equalPortable(a, b []byte) bool {
16 if len(a) != len(b) {
17 return false
18 }
19 for i, c := range a {
20 if c != b[i] {
21 return false
22 }
23 }
24 return true
25 }
26
27
28
29 func explode(s []byte, n int) [][]byte {
30 if n <= 0 {
31 n = len(s)
32 }
33 a := make([][]byte, n)
34 var size int
35 na := 0
36 for len(s) > 0 {
37 if na+1 >= n {
38 a[na] = s
39 na++
40 break
41 }
42 _, size = utf8.DecodeRune(s)
43 a[na] = s[0:size:size]
44 s = s[size:]
45 na++
46 }
47 return a[0:na]
48 }
49
50
51
52 func Count(s, sep []byte) int {
53
54 if len(sep) == 0 {
55 return utf8.RuneCount(s) + 1
56 }
57 if len(sep) == 1 {
58 return bytealg.Count(s, sep[0])
59 }
60 n := 0
61 for {
62 i := Index(s, sep)
63 if i == -1 {
64 return n
65 }
66 n++
67 s = s[i+len(sep):]
68 }
69 }
70
71
72 func Contains(b, subslice []byte) bool {
73 return Index(b, subslice) != -1
74 }
75
76
77 func ContainsAny(b []byte, chars string) bool {
78 return IndexAny(b, chars) >= 0
79 }
80
81
82 func ContainsRune(b []byte, r rune) bool {
83 return IndexRune(b, r) >= 0
84 }
85
86 func indexBytePortable(s []byte, c byte) int {
87 for i, b := range s {
88 if b == c {
89 return i
90 }
91 }
92 return -1
93 }
94
95
96 func LastIndex(s, sep []byte) int {
97 n := len(sep)
98 if n == 0 {
99 return len(s)
100 }
101 c := sep[0]
102 for i := len(s) - n; i >= 0; i-- {
103 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) {
104 return i
105 }
106 }
107 return -1
108 }
109
110
111 func LastIndexByte(s []byte, c byte) int {
112 for i := len(s) - 1; i >= 0; i-- {
113 if s[i] == c {
114 return i
115 }
116 }
117 return -1
118 }
119
120
121
122
123
124
125 func IndexRune(s []byte, r rune) int {
126 switch {
127 case 0 <= r && r < utf8.RuneSelf:
128 return IndexByte(s, byte(r))
129 case r == utf8.RuneError:
130 for i := 0; i < len(s); {
131 r1, n := utf8.DecodeRune(s[i:])
132 if r1 == utf8.RuneError {
133 return i
134 }
135 i += n
136 }
137 return -1
138 case !utf8.ValidRune(r):
139 return -1
140 default:
141 var b [utf8.UTFMax]byte
142 n := utf8.EncodeRune(b[:], r)
143 return Index(s, b[:n])
144 }
145 }
146
147
148
149
150
151 func IndexAny(s []byte, chars string) int {
152 if chars == "" {
153
154 return -1
155 }
156 if len(s) > 8 {
157 if as, isASCII := makeASCIISet(chars); isASCII {
158 for i, c := range s {
159 if as.contains(c) {
160 return i
161 }
162 }
163 return -1
164 }
165 }
166 var width int
167 for i := 0; i < len(s); i += width {
168 r := rune(s[i])
169 if r < utf8.RuneSelf {
170 width = 1
171 } else {
172 r, width = utf8.DecodeRune(s[i:])
173 }
174 for _, ch := range chars {
175 if r == ch {
176 return i
177 }
178 }
179 }
180 return -1
181 }
182
183
184
185
186
187 func LastIndexAny(s []byte, chars string) int {
188 if chars == "" {
189
190 return -1
191 }
192 if len(s) > 8 {
193 if as, isASCII := makeASCIISet(chars); isASCII {
194 for i := len(s) - 1; i >= 0; i-- {
195 if as.contains(s[i]) {
196 return i
197 }
198 }
199 return -1
200 }
201 }
202 for i := len(s); i > 0; {
203 r, size := utf8.DecodeLastRune(s[:i])
204 i -= size
205 for _, c := range chars {
206 if r == c {
207 return i
208 }
209 }
210 }
211 return -1
212 }
213
214
215
216 func genSplit(s, sep []byte, sepSave, n int) [][]byte {
217 if n == 0 {
218 return nil
219 }
220 if len(sep) == 0 {
221 return explode(s, n)
222 }
223 if n < 0 {
224 n = Count(s, sep) + 1
225 }
226
227 a := make([][]byte, n)
228 n--
229 i := 0
230 for i < n {
231 m := Index(s, sep)
232 if m < 0 {
233 break
234 }
235 a[i] = s[: m+sepSave : m+sepSave]
236 s = s[m+len(sep):]
237 i++
238 }
239 a[i] = s
240 return a[:i+1]
241 }
242
243
244
245
246
247
248
249
250 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
251
252
253
254
255
256
257
258
259 func SplitAfterN(s, sep []byte, n int) [][]byte {
260 return genSplit(s, sep, len(sep), n)
261 }
262
263
264
265
266
267 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
268
269
270
271
272
273 func SplitAfter(s, sep []byte) [][]byte {
274 return genSplit(s, sep, len(sep), -1)
275 }
276
277 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
278
279
280
281
282
283 func Fields(s []byte) [][]byte {
284
285
286 n := 0
287 wasSpace := 1
288
289 setBits := uint8(0)
290 for i := 0; i < len(s); i++ {
291 r := s[i]
292 setBits |= r
293 isSpace := int(asciiSpace[r])
294 n += wasSpace & ^isSpace
295 wasSpace = isSpace
296 }
297
298 if setBits >= utf8.RuneSelf {
299
300 return FieldsFunc(s, unicode.IsSpace)
301 }
302
303
304 a := make([][]byte, n)
305 na := 0
306 fieldStart := 0
307 i := 0
308
309 for i < len(s) && asciiSpace[s[i]] != 0 {
310 i++
311 }
312 fieldStart = i
313 for i < len(s) {
314 if asciiSpace[s[i]] == 0 {
315 i++
316 continue
317 }
318 a[na] = s[fieldStart:i:i]
319 na++
320 i++
321
322 for i < len(s) && asciiSpace[s[i]] != 0 {
323 i++
324 }
325 fieldStart = i
326 }
327 if fieldStart < len(s) {
328 a[na] = s[fieldStart:len(s):len(s)]
329 }
330 return a
331 }
332
333
334
335
336
337
338
339 func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
340
341
342 type span struct {
343 start int
344 end int
345 }
346 spans := make([]span, 0, 32)
347
348
349 wasField := false
350 fromIndex := 0
351 for i := 0; i < len(s); {
352 size := 1
353 r := rune(s[i])
354 if r >= utf8.RuneSelf {
355 r, size = utf8.DecodeRune(s[i:])
356 }
357 if f(r) {
358 if wasField {
359 spans = append(spans, span{start: fromIndex, end: i})
360 wasField = false
361 }
362 } else {
363 if !wasField {
364 fromIndex = i
365 wasField = true
366 }
367 }
368 i += size
369 }
370
371
372 if wasField {
373 spans = append(spans, span{fromIndex, len(s)})
374 }
375
376
377 a := make([][]byte, len(spans))
378 for i, span := range spans {
379 a[i] = s[span.start:span.end:span.end]
380 }
381
382 return a
383 }
384
385
386
387 func Join(s [][]byte, sep []byte) []byte {
388 if len(s) == 0 {
389 return []byte{}
390 }
391 if len(s) == 1 {
392
393 return append([]byte(nil), s[0]...)
394 }
395 n := len(sep) * (len(s) - 1)
396 for _, v := range s {
397 n += len(v)
398 }
399
400 b := make([]byte, n)
401 bp := copy(b, s[0])
402 for _, v := range s[1:] {
403 bp += copy(b[bp:], sep)
404 bp += copy(b[bp:], v)
405 }
406 return b
407 }
408
409
410 func HasPrefix(s, prefix []byte) bool {
411 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
412 }
413
414
415 func HasSuffix(s, suffix []byte) bool {
416 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
417 }
418
419
420
421
422
423 func Map(mapping func(r rune) rune, s []byte) []byte {
424
425
426
427 maxbytes := len(s)
428 nbytes := 0
429 b := make([]byte, maxbytes)
430 for i := 0; i < len(s); {
431 wid := 1
432 r := rune(s[i])
433 if r >= utf8.RuneSelf {
434 r, wid = utf8.DecodeRune(s[i:])
435 }
436 r = mapping(r)
437 if r >= 0 {
438 rl := utf8.RuneLen(r)
439 if rl < 0 {
440 rl = len(string(utf8.RuneError))
441 }
442 if nbytes+rl > maxbytes {
443
444 maxbytes = maxbytes*2 + utf8.UTFMax
445 nb := make([]byte, maxbytes)
446 copy(nb, b[0:nbytes])
447 b = nb
448 }
449 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
450 }
451 i += wid
452 }
453 return b[0:nbytes]
454 }
455
456
457
458
459
460 func Repeat(b []byte, count int) []byte {
461
462
463
464
465 if count < 0 {
466 panic("bytes: negative Repeat count")
467 } else if count > 0 && len(b)*count/count != len(b) {
468 panic("bytes: Repeat count causes overflow")
469 }
470
471 nb := make([]byte, len(b)*count)
472 bp := copy(nb, b)
473 for bp < len(nb) {
474 copy(nb[bp:], nb[:bp])
475 bp *= 2
476 }
477 return nb
478 }
479
480
481 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
482
483
484 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
485
486
487 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
488
489
490
491 func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
492 return Map(func(r rune) rune { return c.ToUpper(r) }, s)
493 }
494
495
496
497 func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
498 return Map(func(r rune) rune { return c.ToLower(r) }, s)
499 }
500
501
502
503 func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
504 return Map(func(r rune) rune { return c.ToTitle(r) }, s)
505 }
506
507
508
509 func isSeparator(r rune) bool {
510
511 if r <= 0x7F {
512 switch {
513 case '0' <= r && r <= '9':
514 return false
515 case 'a' <= r && r <= 'z':
516 return false
517 case 'A' <= r && r <= 'Z':
518 return false
519 case r == '_':
520 return false
521 }
522 return true
523 }
524
525 if unicode.IsLetter(r) || unicode.IsDigit(r) {
526 return false
527 }
528
529 return unicode.IsSpace(r)
530 }
531
532
533
534
535
536 func Title(s []byte) []byte {
537
538
539
540 prev := ' '
541 return Map(
542 func(r rune) rune {
543 if isSeparator(prev) {
544 prev = r
545 return unicode.ToTitle(r)
546 }
547 prev = r
548 return r
549 },
550 s)
551 }
552
553
554
555 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
556 i := indexFunc(s, f, false)
557 if i == -1 {
558 return nil
559 }
560 return s[i:]
561 }
562
563
564
565 func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
566 i := lastIndexFunc(s, f, false)
567 if i >= 0 && s[i] >= utf8.RuneSelf {
568 _, wid := utf8.DecodeRune(s[i:])
569 i += wid
570 } else {
571 i++
572 }
573 return s[0:i]
574 }
575
576
577
578 func TrimFunc(s []byte, f func(r rune) bool) []byte {
579 return TrimRightFunc(TrimLeftFunc(s, f), f)
580 }
581
582
583
584 func TrimPrefix(s, prefix []byte) []byte {
585 if HasPrefix(s, prefix) {
586 return s[len(prefix):]
587 }
588 return s
589 }
590
591
592
593 func TrimSuffix(s, suffix []byte) []byte {
594 if HasSuffix(s, suffix) {
595 return s[:len(s)-len(suffix)]
596 }
597 return s
598 }
599
600
601
602
603 func IndexFunc(s []byte, f func(r rune) bool) int {
604 return indexFunc(s, f, true)
605 }
606
607
608
609
610 func LastIndexFunc(s []byte, f func(r rune) bool) int {
611 return lastIndexFunc(s, f, true)
612 }
613
614
615
616
617 func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
618 start := 0
619 for start < len(s) {
620 wid := 1
621 r := rune(s[start])
622 if r >= utf8.RuneSelf {
623 r, wid = utf8.DecodeRune(s[start:])
624 }
625 if f(r) == truth {
626 return start
627 }
628 start += wid
629 }
630 return -1
631 }
632
633
634
635
636 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
637 for i := len(s); i > 0; {
638 r, size := rune(s[i-1]), 1
639 if r >= utf8.RuneSelf {
640 r, size = utf8.DecodeLastRune(s[0:i])
641 }
642 i -= size
643 if f(r) == truth {
644 return i
645 }
646 }
647 return -1
648 }
649
650
651
652
653
654
655
656 type asciiSet [8]uint32
657
658
659
660 func makeASCIISet(chars string) (as asciiSet, ok bool) {
661 for i := 0; i < len(chars); i++ {
662 c := chars[i]
663 if c >= utf8.RuneSelf {
664 return as, false
665 }
666 as[c>>5] |= 1 << uint(c&31)
667 }
668 return as, true
669 }
670
671
672 func (as *asciiSet) contains(c byte) bool {
673 return (as[c>>5] & (1 << uint(c&31))) != 0
674 }
675
676 func makeCutsetFunc(cutset string) func(r rune) bool {
677 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
678 return func(r rune) bool {
679 return r == rune(cutset[0])
680 }
681 }
682 if as, isASCII := makeASCIISet(cutset); isASCII {
683 return func(r rune) bool {
684 return r < utf8.RuneSelf && as.contains(byte(r))
685 }
686 }
687 return func(r rune) bool {
688 for _, c := range cutset {
689 if c == r {
690 return true
691 }
692 }
693 return false
694 }
695 }
696
697
698
699 func Trim(s []byte, cutset string) []byte {
700 return TrimFunc(s, makeCutsetFunc(cutset))
701 }
702
703
704
705 func TrimLeft(s []byte, cutset string) []byte {
706 return TrimLeftFunc(s, makeCutsetFunc(cutset))
707 }
708
709
710
711 func TrimRight(s []byte, cutset string) []byte {
712 return TrimRightFunc(s, makeCutsetFunc(cutset))
713 }
714
715
716
717 func TrimSpace(s []byte) []byte {
718 return TrimFunc(s, unicode.IsSpace)
719 }
720
721
722
723 func Runes(s []byte) []rune {
724 t := make([]rune, utf8.RuneCount(s))
725 i := 0
726 for len(s) > 0 {
727 r, l := utf8.DecodeRune(s)
728 t[i] = r
729 i++
730 s = s[l:]
731 }
732 return t
733 }
734
735
736
737
738
739
740
741 func Replace(s, old, new []byte, n int) []byte {
742 m := 0
743 if n != 0 {
744
745 m = Count(s, old)
746 }
747 if m == 0 {
748
749 return append([]byte(nil), s...)
750 }
751 if n < 0 || m < n {
752 n = m
753 }
754
755
756 t := make([]byte, len(s)+n*(len(new)-len(old)))
757 w := 0
758 start := 0
759 for i := 0; i < n; i++ {
760 j := start
761 if len(old) == 0 {
762 if i > 0 {
763 _, wid := utf8.DecodeRune(s[start:])
764 j += wid
765 }
766 } else {
767 j += Index(s[start:], old)
768 }
769 w += copy(t[w:], s[start:j])
770 w += copy(t[w:], new)
771 start = j + len(old)
772 }
773 w += copy(t[w:], s[start:])
774 return t[0:w]
775 }
776
777
778
779 func EqualFold(s, t []byte) bool {
780 for len(s) != 0 && len(t) != 0 {
781
782 var sr, tr rune
783 if s[0] < utf8.RuneSelf {
784 sr, s = rune(s[0]), s[1:]
785 } else {
786 r, size := utf8.DecodeRune(s)
787 sr, s = r, s[size:]
788 }
789 if t[0] < utf8.RuneSelf {
790 tr, t = rune(t[0]), t[1:]
791 } else {
792 r, size := utf8.DecodeRune(t)
793 tr, t = r, t[size:]
794 }
795
796
797
798
799 if tr == sr {
800 continue
801 }
802
803
804 if tr < sr {
805 tr, sr = sr, tr
806 }
807
808 if tr < utf8.RuneSelf {
809
810 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
811 continue
812 }
813 return false
814 }
815
816
817
818 r := unicode.SimpleFold(sr)
819 for r != sr && r < tr {
820 r = unicode.SimpleFold(r)
821 }
822 if r == tr {
823 continue
824 }
825 return false
826 }
827
828
829 return len(s) == len(t)
830 }
831
832
833 func Index(s, sep []byte) int {
834 n := len(sep)
835 switch {
836 case n == 0:
837 return 0
838 case n == 1:
839 return IndexByte(s, sep[0])
840 case n == len(s):
841 if Equal(sep, s) {
842 return 0
843 }
844 return -1
845 case n > len(s):
846 return -1
847 case n <= bytealg.MaxLen:
848
849 if len(s) <= bytealg.MaxBruteForce {
850 return bytealg.Index(s, sep)
851 }
852 c := sep[0]
853 i := 0
854 t := s[:len(s)-n+1]
855 fails := 0
856 for i < len(t) {
857 if t[i] != c {
858
859
860 o := IndexByte(t[i:], c)
861 if o < 0 {
862 return -1
863 }
864 i += o
865 }
866 if Equal(s[i:i+n], sep) {
867 return i
868 }
869 fails++
870 i++
871
872 if fails > bytealg.Cutover(i) {
873 r := bytealg.Index(s[i:], sep)
874 if r >= 0 {
875 return r + i
876 }
877 return -1
878 }
879 }
880 return -1
881 }
882 c := sep[0]
883 i := 0
884 fails := 0
885 t := s[:len(s)-n+1]
886 for i < len(t) {
887 if t[i] != c {
888 o := IndexByte(t[i:], c)
889 if o < 0 {
890 break
891 }
892 i += o
893 }
894 if Equal(s[i:i+n], sep) {
895 return i
896 }
897 i++
898 fails++
899 if fails >= 4+i>>4 && i < len(t) {
900
901
902
903
904
905
906
907
908 j := indexRabinKarp(s[i:], sep)
909 if j < 0 {
910 return -1
911 }
912 return i + j
913 }
914 }
915 return -1
916 }
917
918 func indexRabinKarp(s, sep []byte) int {
919
920 hashsep, pow := hashStr(sep)
921 n := len(sep)
922 var h uint32
923 for i := 0; i < n; i++ {
924 h = h*primeRK + uint32(s[i])
925 }
926 if h == hashsep && Equal(s[:n], sep) {
927 return 0
928 }
929 for i := n; i < len(s); {
930 h *= primeRK
931 h += uint32(s[i])
932 h -= pow * uint32(s[i-n])
933 i++
934 if h == hashsep && Equal(s[i-n:i], sep) {
935 return i - n
936 }
937 }
938 return -1
939 }
940
941
942 const primeRK = 16777619
943
944
945
946 func hashStr(sep []byte) (uint32, uint32) {
947 hash := uint32(0)
948 for i := 0; i < len(sep); i++ {
949 hash = hash*primeRK + uint32(sep[i])
950 }
951 var pow, sq uint32 = 1, primeRK
952 for i := len(sep); i > 0; i >>= 1 {
953 if i&1 != 0 {
954 pow *= sq
955 }
956 sq *= sq
957 }
958 return hash, pow
959 }
960
View as plain text