...
Run Format

Source file src/bytes/bytes.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package bytes implements functions for the manipulation of byte slices.
     6	// It is analogous to the facilities of the strings package.
     7	package bytes
     8	
     9	import (
    10		"unicode"
    11		"unicode/utf8"
    12	)
    13	
    14	func equalPortable(a, b []byte) bool {
    15		if len(a) != len(b) {
    16			return false
    17		}
    18		for i, c := range a {
    19			if c != b[i] {
    20				return false
    21			}
    22		}
    23		return true
    24	}
    25	
    26	// explode splits s into a slice of UTF-8 sequences, one per Unicode character (still slices of bytes),
    27	// up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
    28	func explode(s []byte, n int) [][]byte {
    29		if n <= 0 {
    30			n = len(s)
    31		}
    32		a := make([][]byte, n)
    33		var size int
    34		na := 0
    35		for len(s) > 0 {
    36			if na+1 >= n {
    37				a[na] = s
    38				na++
    39				break
    40			}
    41			_, size = utf8.DecodeRune(s)
    42			a[na] = s[0:size]
    43			s = s[size:]
    44			na++
    45		}
    46		return a[0:na]
    47	}
    48	
    49	// Count counts the number of non-overlapping instances of sep in s.
    50	func Count(s, sep []byte) int {
    51		n := len(sep)
    52		if n == 0 {
    53			return utf8.RuneCount(s) + 1
    54		}
    55		if n > len(s) {
    56			return 0
    57		}
    58		count := 0
    59		c := sep[0]
    60		i := 0
    61		t := s[:len(s)-n+1]
    62		for i < len(t) {
    63			if t[i] != c {
    64				o := IndexByte(t[i:], c)
    65				if o < 0 {
    66					break
    67				}
    68				i += o
    69			}
    70			if n == 1 || Equal(s[i:i+n], sep) {
    71				count++
    72				i += n
    73				continue
    74			}
    75			i++
    76		}
    77		return count
    78	}
    79	
    80	// Contains reports whether subslice is within b.
    81	func Contains(b, subslice []byte) bool {
    82		return Index(b, subslice) != -1
    83	}
    84	
    85	// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
    86	func Index(s, sep []byte) int {
    87		n := len(sep)
    88		if n == 0 {
    89			return 0
    90		}
    91		if n > len(s) {
    92			return -1
    93		}
    94		c := sep[0]
    95		if n == 1 {
    96			return IndexByte(s, c)
    97		}
    98		i := 0
    99		t := s[:len(s)-n+1]
   100		for i < len(t) {
   101			if t[i] != c {
   102				o := IndexByte(t[i:], c)
   103				if o < 0 {
   104					break
   105				}
   106				i += o
   107			}
   108			if Equal(s[i:i+n], sep) {
   109				return i
   110			}
   111			i++
   112		}
   113		return -1
   114	}
   115	
   116	func indexBytePortable(s []byte, c byte) int {
   117		for i, b := range s {
   118			if b == c {
   119				return i
   120			}
   121		}
   122		return -1
   123	}
   124	
   125	// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
   126	func LastIndex(s, sep []byte) int {
   127		n := len(sep)
   128		if n == 0 {
   129			return len(s)
   130		}
   131		c := sep[0]
   132		for i := len(s) - n; i >= 0; i-- {
   133			if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) {
   134				return i
   135			}
   136		}
   137		return -1
   138	}
   139	
   140	// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
   141	// It returns the byte index of the first occurrence in s of the given rune.
   142	// It returns -1 if rune is not present in s.
   143	func IndexRune(s []byte, r rune) int {
   144		for i := 0; i < len(s); {
   145			r1, size := utf8.DecodeRune(s[i:])
   146			if r == r1 {
   147				return i
   148			}
   149			i += size
   150		}
   151		return -1
   152	}
   153	
   154	// IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
   155	// It returns the byte index of the first occurrence in s of any of the Unicode
   156	// code points in chars.  It returns -1 if chars is empty or if there is no code
   157	// point in common.
   158	func IndexAny(s []byte, chars string) int {
   159		if len(chars) > 0 {
   160			var r rune
   161			var width int
   162			for i := 0; i < len(s); i += width {
   163				r = rune(s[i])
   164				if r < utf8.RuneSelf {
   165					width = 1
   166				} else {
   167					r, width = utf8.DecodeRune(s[i:])
   168				}
   169				for _, ch := range chars {
   170					if r == ch {
   171						return i
   172					}
   173				}
   174			}
   175		}
   176		return -1
   177	}
   178	
   179	// LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
   180	// points.  It returns the byte index of the last occurrence in s of any of
   181	// the Unicode code points in chars.  It returns -1 if chars is empty or if
   182	// there is no code point in common.
   183	func LastIndexAny(s []byte, chars string) int {
   184		if len(chars) > 0 {
   185			for i := len(s); i > 0; {
   186				r, size := utf8.DecodeLastRune(s[0:i])
   187				i -= size
   188				for _, ch := range chars {
   189					if r == ch {
   190						return i
   191					}
   192				}
   193			}
   194		}
   195		return -1
   196	}
   197	
   198	// Generic split: splits after each instance of sep,
   199	// including sepSave bytes of sep in the subslices.
   200	func genSplit(s, sep []byte, sepSave, n int) [][]byte {
   201		if n == 0 {
   202			return nil
   203		}
   204		if len(sep) == 0 {
   205			return explode(s, n)
   206		}
   207		if n < 0 {
   208			n = Count(s, sep) + 1
   209		}
   210		c := sep[0]
   211		start := 0
   212		a := make([][]byte, n)
   213		na := 0
   214		for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
   215			if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) {
   216				a[na] = s[start : i+sepSave]
   217				na++
   218				start = i + len(sep)
   219				i += len(sep) - 1
   220			}
   221		}
   222		a[na] = s[start:]
   223		return a[0 : na+1]
   224	}
   225	
   226	// SplitN slices s into subslices separated by sep and returns a slice of
   227	// the subslices between those separators.
   228	// If sep is empty, SplitN splits after each UTF-8 sequence.
   229	// The count determines the number of subslices to return:
   230	//   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
   231	//   n == 0: the result is nil (zero subslices)
   232	//   n < 0: all subslices
   233	func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
   234	
   235	// SplitAfterN slices s into subslices after each instance of sep and
   236	// returns a slice of those subslices.
   237	// If sep is empty, SplitAfterN splits after each UTF-8 sequence.
   238	// The count determines the number of subslices to return:
   239	//   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
   240	//   n == 0: the result is nil (zero subslices)
   241	//   n < 0: all subslices
   242	func SplitAfterN(s, sep []byte, n int) [][]byte {
   243		return genSplit(s, sep, len(sep), n)
   244	}
   245	
   246	// Split slices s into all subslices separated by sep and returns a slice of
   247	// the subslices between those separators.
   248	// If sep is empty, Split splits after each UTF-8 sequence.
   249	// It is equivalent to SplitN with a count of -1.
   250	func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
   251	
   252	// SplitAfter slices s into all subslices after each instance of sep and
   253	// returns a slice of those subslices.
   254	// If sep is empty, SplitAfter splits after each UTF-8 sequence.
   255	// It is equivalent to SplitAfterN with a count of -1.
   256	func SplitAfter(s, sep []byte) [][]byte {
   257		return genSplit(s, sep, len(sep), -1)
   258	}
   259	
   260	// Fields splits the slice s around each instance of one or more consecutive white space
   261	// characters, returning a slice of subslices of s or an empty list if s contains only white space.
   262	func Fields(s []byte) [][]byte {
   263		return FieldsFunc(s, unicode.IsSpace)
   264	}
   265	
   266	// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
   267	// It splits the slice s at each run of code points c satisfying f(c) and
   268	// returns a slice of subslices of s.  If all code points in s satisfy f(c), or
   269	// len(s) == 0, an empty slice is returned.
   270	// FieldsFunc makes no guarantees about the order in which it calls f(c).
   271	// If f does not return consistent results for a given c, FieldsFunc may crash.
   272	func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
   273		n := 0
   274		inField := false
   275		for i := 0; i < len(s); {
   276			r, size := utf8.DecodeRune(s[i:])
   277			wasInField := inField
   278			inField = !f(r)
   279			if inField && !wasInField {
   280				n++
   281			}
   282			i += size
   283		}
   284	
   285		a := make([][]byte, n)
   286		na := 0
   287		fieldStart := -1
   288		for i := 0; i <= len(s) && na < n; {
   289			r, size := utf8.DecodeRune(s[i:])
   290			if fieldStart < 0 && size > 0 && !f(r) {
   291				fieldStart = i
   292				i += size
   293				continue
   294			}
   295			if fieldStart >= 0 && (size == 0 || f(r)) {
   296				a[na] = s[fieldStart:i]
   297				na++
   298				fieldStart = -1
   299			}
   300			if size == 0 {
   301				break
   302			}
   303			i += size
   304		}
   305		return a[0:na]
   306	}
   307	
   308	// Join concatenates the elements of s to create a new byte slice. The separator
   309	// sep is placed between elements in the resulting slice.
   310	func Join(s [][]byte, sep []byte) []byte {
   311		if len(s) == 0 {
   312			return []byte{}
   313		}
   314		if len(s) == 1 {
   315			// Just return a copy.
   316			return append([]byte(nil), s[0]...)
   317		}
   318		n := len(sep) * (len(s) - 1)
   319		for _, v := range s {
   320			n += len(v)
   321		}
   322	
   323		b := make([]byte, n)
   324		bp := copy(b, s[0])
   325		for _, v := range s[1:] {
   326			bp += copy(b[bp:], sep)
   327			bp += copy(b[bp:], v)
   328		}
   329		return b
   330	}
   331	
   332	// HasPrefix tests whether the byte slice s begins with prefix.
   333	func HasPrefix(s, prefix []byte) bool {
   334		return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
   335	}
   336	
   337	// HasSuffix tests whether the byte slice s ends with suffix.
   338	func HasSuffix(s, suffix []byte) bool {
   339		return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
   340	}
   341	
   342	// Map returns a copy of the byte slice s with all its characters modified
   343	// according to the mapping function. If mapping returns a negative value, the character is
   344	// dropped from the string with no replacement.  The characters in s and the
   345	// output are interpreted as UTF-8-encoded Unicode code points.
   346	func Map(mapping func(r rune) rune, s []byte) []byte {
   347		// In the worst case, the slice can grow when mapped, making
   348		// things unpleasant.  But it's so rare we barge in assuming it's
   349		// fine.  It could also shrink but that falls out naturally.
   350		maxbytes := len(s) // length of b
   351		nbytes := 0        // number of bytes encoded in b
   352		b := make([]byte, maxbytes)
   353		for i := 0; i < len(s); {
   354			wid := 1
   355			r := rune(s[i])
   356			if r >= utf8.RuneSelf {
   357				r, wid = utf8.DecodeRune(s[i:])
   358			}
   359			r = mapping(r)
   360			if r >= 0 {
   361				rl := utf8.RuneLen(r)
   362				if rl < 0 {
   363					rl = len(string(utf8.RuneError))
   364				}
   365				if nbytes+rl > maxbytes {
   366					// Grow the buffer.
   367					maxbytes = maxbytes*2 + utf8.UTFMax
   368					nb := make([]byte, maxbytes)
   369					copy(nb, b[0:nbytes])
   370					b = nb
   371				}
   372				nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
   373			}
   374			i += wid
   375		}
   376		return b[0:nbytes]
   377	}
   378	
   379	// Repeat returns a new byte slice consisting of count copies of b.
   380	func Repeat(b []byte, count int) []byte {
   381		nb := make([]byte, len(b)*count)
   382		bp := copy(nb, b)
   383		for bp < len(nb) {
   384			copy(nb[bp:], nb[:bp])
   385			bp *= 2
   386		}
   387		return nb
   388	}
   389	
   390	// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case.
   391	func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
   392	
   393	// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case.
   394	func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
   395	
   396	// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case.
   397	func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
   398	
   399	// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
   400	// upper case, giving priority to the special casing rules.
   401	func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte {
   402		return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
   403	}
   404	
   405	// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
   406	// lower case, giving priority to the special casing rules.
   407	func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte {
   408		return Map(func(r rune) rune { return _case.ToLower(r) }, s)
   409	}
   410	
   411	// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
   412	// title case, giving priority to the special casing rules.
   413	func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte {
   414		return Map(func(r rune) rune { return _case.ToTitle(r) }, s)
   415	}
   416	
   417	// isSeparator reports whether the rune could mark a word boundary.
   418	// TODO: update when package unicode captures more of the properties.
   419	func isSeparator(r rune) bool {
   420		// ASCII alphanumerics and underscore are not separators
   421		if r <= 0x7F {
   422			switch {
   423			case '0' <= r && r <= '9':
   424				return false
   425			case 'a' <= r && r <= 'z':
   426				return false
   427			case 'A' <= r && r <= 'Z':
   428				return false
   429			case r == '_':
   430				return false
   431			}
   432			return true
   433		}
   434		// Letters and digits are not separators
   435		if unicode.IsLetter(r) || unicode.IsDigit(r) {
   436			return false
   437		}
   438		// Otherwise, all we can do for now is treat spaces as separators.
   439		return unicode.IsSpace(r)
   440	}
   441	
   442	// Title returns a copy of s with all Unicode letters that begin words
   443	// mapped to their title case.
   444	//
   445	// BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly.
   446	func Title(s []byte) []byte {
   447		// Use a closure here to remember state.
   448		// Hackish but effective. Depends on Map scanning in order and calling
   449		// the closure once per rune.
   450		prev := ' '
   451		return Map(
   452			func(r rune) rune {
   453				if isSeparator(prev) {
   454					prev = r
   455					return unicode.ToTitle(r)
   456				}
   457				prev = r
   458				return r
   459			},
   460			s)
   461	}
   462	
   463	// TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded
   464	// Unicode code points c that satisfy f(c).
   465	func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
   466		i := indexFunc(s, f, false)
   467		if i == -1 {
   468			return nil
   469		}
   470		return s[i:]
   471	}
   472	
   473	// TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8
   474	// encoded Unicode code points c that satisfy f(c).
   475	func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
   476		i := lastIndexFunc(s, f, false)
   477		if i >= 0 && s[i] >= utf8.RuneSelf {
   478			_, wid := utf8.DecodeRune(s[i:])
   479			i += wid
   480		} else {
   481			i++
   482		}
   483		return s[0:i]
   484	}
   485	
   486	// TrimFunc returns a subslice of s by slicing off all leading and trailing
   487	// UTF-8-encoded Unicode code points c that satisfy f(c).
   488	func TrimFunc(s []byte, f func(r rune) bool) []byte {
   489		return TrimRightFunc(TrimLeftFunc(s, f), f)
   490	}
   491	
   492	// TrimPrefix returns s without the provided leading prefix string.
   493	// If s doesn't start with prefix, s is returned unchanged.
   494	func TrimPrefix(s, prefix []byte) []byte {
   495		if HasPrefix(s, prefix) {
   496			return s[len(prefix):]
   497		}
   498		return s
   499	}
   500	
   501	// TrimSuffix returns s without the provided trailing suffix string.
   502	// If s doesn't end with suffix, s is returned unchanged.
   503	func TrimSuffix(s, suffix []byte) []byte {
   504		if HasSuffix(s, suffix) {
   505			return s[:len(s)-len(suffix)]
   506		}
   507		return s
   508	}
   509	
   510	// IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
   511	// It returns the byte index in s of the first Unicode
   512	// code point satisfying f(c), or -1 if none do.
   513	func IndexFunc(s []byte, f func(r rune) bool) int {
   514		return indexFunc(s, f, true)
   515	}
   516	
   517	// LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
   518	// It returns the byte index in s of the last Unicode
   519	// code point satisfying f(c), or -1 if none do.
   520	func LastIndexFunc(s []byte, f func(r rune) bool) int {
   521		return lastIndexFunc(s, f, true)
   522	}
   523	
   524	// indexFunc is the same as IndexFunc except that if
   525	// truth==false, the sense of the predicate function is
   526	// inverted.
   527	func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
   528		start := 0
   529		for start < len(s) {
   530			wid := 1
   531			r := rune(s[start])
   532			if r >= utf8.RuneSelf {
   533				r, wid = utf8.DecodeRune(s[start:])
   534			}
   535			if f(r) == truth {
   536				return start
   537			}
   538			start += wid
   539		}
   540		return -1
   541	}
   542	
   543	// lastIndexFunc is the same as LastIndexFunc except that if
   544	// truth==false, the sense of the predicate function is
   545	// inverted.
   546	func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
   547		for i := len(s); i > 0; {
   548			r, size := rune(s[i-1]), 1
   549			if r >= utf8.RuneSelf {
   550				r, size = utf8.DecodeLastRune(s[0:i])
   551			}
   552			i -= size
   553			if f(r) == truth {
   554				return i
   555			}
   556		}
   557		return -1
   558	}
   559	
   560	func makeCutsetFunc(cutset string) func(r rune) bool {
   561		return func(r rune) bool {
   562			for _, c := range cutset {
   563				if c == r {
   564					return true
   565				}
   566			}
   567			return false
   568		}
   569	}
   570	
   571	// Trim returns a subslice of s by slicing off all leading and
   572	// trailing UTF-8-encoded Unicode code points contained in cutset.
   573	func Trim(s []byte, cutset string) []byte {
   574		return TrimFunc(s, makeCutsetFunc(cutset))
   575	}
   576	
   577	// TrimLeft returns a subslice of s by slicing off all leading
   578	// UTF-8-encoded Unicode code points contained in cutset.
   579	func TrimLeft(s []byte, cutset string) []byte {
   580		return TrimLeftFunc(s, makeCutsetFunc(cutset))
   581	}
   582	
   583	// TrimRight returns a subslice of s by slicing off all trailing
   584	// UTF-8-encoded Unicode code points that are contained in cutset.
   585	func TrimRight(s []byte, cutset string) []byte {
   586		return TrimRightFunc(s, makeCutsetFunc(cutset))
   587	}
   588	
   589	// TrimSpace returns a subslice of s by slicing off all leading and
   590	// trailing white space, as defined by Unicode.
   591	func TrimSpace(s []byte) []byte {
   592		return TrimFunc(s, unicode.IsSpace)
   593	}
   594	
   595	// Runes returns a slice of runes (Unicode code points) equivalent to s.
   596	func Runes(s []byte) []rune {
   597		t := make([]rune, utf8.RuneCount(s))
   598		i := 0
   599		for len(s) > 0 {
   600			r, l := utf8.DecodeRune(s)
   601			t[i] = r
   602			i++
   603			s = s[l:]
   604		}
   605		return t
   606	}
   607	
   608	// Replace returns a copy of the slice s with the first n
   609	// non-overlapping instances of old replaced by new.
   610	// If old is empty, it matches at the beginning of the slice
   611	// and after each UTF-8 sequence, yielding up to k+1 replacements
   612	// for a k-rune slice.
   613	// If n < 0, there is no limit on the number of replacements.
   614	func Replace(s, old, new []byte, n int) []byte {
   615		m := 0
   616		if n != 0 {
   617			// Compute number of replacements.
   618			m = Count(s, old)
   619		}
   620		if m == 0 {
   621			// Just return a copy.
   622			return append([]byte(nil), s...)
   623		}
   624		if n < 0 || m < n {
   625			n = m
   626		}
   627	
   628		// Apply replacements to buffer.
   629		t := make([]byte, len(s)+n*(len(new)-len(old)))
   630		w := 0
   631		start := 0
   632		for i := 0; i < n; i++ {
   633			j := start
   634			if len(old) == 0 {
   635				if i > 0 {
   636					_, wid := utf8.DecodeRune(s[start:])
   637					j += wid
   638				}
   639			} else {
   640				j += Index(s[start:], old)
   641			}
   642			w += copy(t[w:], s[start:j])
   643			w += copy(t[w:], new)
   644			start = j + len(old)
   645		}
   646		w += copy(t[w:], s[start:])
   647		return t[0:w]
   648	}
   649	
   650	// EqualFold reports whether s and t, interpreted as UTF-8 strings,
   651	// are equal under Unicode case-folding.
   652	func EqualFold(s, t []byte) bool {
   653		for len(s) != 0 && len(t) != 0 {
   654			// Extract first rune from each.
   655			var sr, tr rune
   656			if s[0] < utf8.RuneSelf {
   657				sr, s = rune(s[0]), s[1:]
   658			} else {
   659				r, size := utf8.DecodeRune(s)
   660				sr, s = r, s[size:]
   661			}
   662			if t[0] < utf8.RuneSelf {
   663				tr, t = rune(t[0]), t[1:]
   664			} else {
   665				r, size := utf8.DecodeRune(t)
   666				tr, t = r, t[size:]
   667			}
   668	
   669			// If they match, keep going; if not, return false.
   670	
   671			// Easy case.
   672			if tr == sr {
   673				continue
   674			}
   675	
   676			// Make sr < tr to simplify what follows.
   677			if tr < sr {
   678				tr, sr = sr, tr
   679			}
   680			// Fast check for ASCII.
   681			if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
   682				// ASCII, and sr is upper case.  tr must be lower case.
   683				if tr == sr+'a'-'A' {
   684					continue
   685				}
   686				return false
   687			}
   688	
   689			// General case.  SimpleFold(x) returns the next equivalent rune > x
   690			// or wraps around to smaller values.
   691			r := unicode.SimpleFold(sr)
   692			for r != sr && r < tr {
   693				r = unicode.SimpleFold(r)
   694			}
   695			if r == tr {
   696				continue
   697			}
   698			return false
   699		}
   700	
   701		// One string is empty.  Are both?
   702		return len(s) == len(t)
   703	}
   704	

View as plain text