...
Run Format

Source file src/pkg/bytes/bytes.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package bytes implements functions for the manipulation of byte slices.
     6	// It is analogous to the facilities of the strings package.
     7	package bytes
     8	
     9	import (
    10		"unicode"
    11		"unicode/utf8"
    12	)
    13	
    14	func equalPortable(a, b []byte) bool {
    15		if len(a) != len(b) {
    16			return false
    17		}
    18		for i, c := range a {
    19			if c != b[i] {
    20				return false
    21			}
    22		}
    23		return true
    24	}
    25	
    26	// explode splits s into a slice of UTF-8 sequences, one per Unicode character (still slices of bytes),
    27	// up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
    28	func explode(s []byte, n int) [][]byte {
    29		if n <= 0 {
    30			n = len(s)
    31		}
    32		a := make([][]byte, n)
    33		var size int
    34		na := 0
    35		for len(s) > 0 {
    36			if na+1 >= n {
    37				a[na] = s
    38				na++
    39				break
    40			}
    41			_, size = utf8.DecodeRune(s)
    42			a[na] = s[0:size]
    43			s = s[size:]
    44			na++
    45		}
    46		return a[0:na]
    47	}
    48	
    49	// Count counts the number of non-overlapping instances of sep in s.
    50	func Count(s, sep []byte) int {
    51		n := len(sep)
    52		if n == 0 {
    53			return utf8.RuneCount(s) + 1
    54		}
    55		if n > len(s) {
    56			return 0
    57		}
    58		count := 0
    59		c := sep[0]
    60		i := 0
    61		t := s[:len(s)-n+1]
    62		for i < len(t) {
    63			if t[i] != c {
    64				o := IndexByte(t[i:], c)
    65				if o < 0 {
    66					break
    67				}
    68				i += o
    69			}
    70			if n == 1 || Equal(s[i:i+n], sep) {
    71				count++
    72				i += n
    73				continue
    74			}
    75			i++
    76		}
    77		return count
    78	}
    79	
    80	// Contains reports whether subslice is within b.
    81	func Contains(b, subslice []byte) bool {
    82		return Index(b, subslice) != -1
    83	}
    84	
    85	// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
    86	func Index(s, sep []byte) int {
    87		n := len(sep)
    88		if n == 0 {
    89			return 0
    90		}
    91		if n > len(s) {
    92			return -1
    93		}
    94		c := sep[0]
    95		if n == 1 {
    96			return IndexByte(s, c)
    97		}
    98		i := 0
    99		t := s[:len(s)-n+1]
   100		for i < len(t) {
   101			if t[i] != c {
   102				o := IndexByte(t[i:], c)
   103				if o < 0 {
   104					break
   105				}
   106				i += o
   107			}
   108			if Equal(s[i:i+n], sep) {
   109				return i
   110			}
   111			i++
   112		}
   113		return -1
   114	}
   115	
   116	func indexBytePortable(s []byte, c byte) int {
   117		for i, b := range s {
   118			if b == c {
   119				return i
   120			}
   121		}
   122		return -1
   123	}
   124	
   125	// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
   126	func LastIndex(s, sep []byte) int {
   127		n := len(sep)
   128		if n == 0 {
   129			return len(s)
   130		}
   131		c := sep[0]
   132		for i := len(s) - n; i >= 0; i-- {
   133			if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) {
   134				return i
   135			}
   136		}
   137		return -1
   138	}
   139	
   140	// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
   141	// It returns the byte index of the first occurrence in s of the given rune.
   142	// It returns -1 if rune is not present in s.
   143	func IndexRune(s []byte, r rune) int {
   144		for i := 0; i < len(s); {
   145			r1, size := utf8.DecodeRune(s[i:])
   146			if r == r1 {
   147				return i
   148			}
   149			i += size
   150		}
   151		return -1
   152	}
   153	
   154	// IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
   155	// It returns the byte index of the first occurrence in s of any of the Unicode
   156	// code points in chars.  It returns -1 if chars is empty or if there is no code
   157	// point in common.
   158	func IndexAny(s []byte, chars string) int {
   159		if len(chars) > 0 {
   160			var r rune
   161			var width int
   162			for i := 0; i < len(s); i += width {
   163				r = rune(s[i])
   164				if r < utf8.RuneSelf {
   165					width = 1
   166				} else {
   167					r, width = utf8.DecodeRune(s[i:])
   168				}
   169				for _, ch := range chars {
   170					if r == ch {
   171						return i
   172					}
   173				}
   174			}
   175		}
   176		return -1
   177	}
   178	
   179	// LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
   180	// points.  It returns the byte index of the last occurrence in s of any of
   181	// the Unicode code points in chars.  It returns -1 if chars is empty or if
   182	// there is no code point in common.
   183	func LastIndexAny(s []byte, chars string) int {
   184		if len(chars) > 0 {
   185			for i := len(s); i > 0; {
   186				r, size := utf8.DecodeLastRune(s[0:i])
   187				i -= size
   188				for _, ch := range chars {
   189					if r == ch {
   190						return i
   191					}
   192				}
   193			}
   194		}
   195		return -1
   196	}
   197	
   198	// Generic split: splits after each instance of sep,
   199	// including sepSave bytes of sep in the subslices.
   200	func genSplit(s, sep []byte, sepSave, n int) [][]byte {
   201		if n == 0 {
   202			return nil
   203		}
   204		if len(sep) == 0 {
   205			return explode(s, n)
   206		}
   207		if n < 0 {
   208			n = Count(s, sep) + 1
   209		}
   210		c := sep[0]
   211		start := 0
   212		a := make([][]byte, n)
   213		na := 0
   214		for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
   215			if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) {
   216				a[na] = s[start : i+sepSave]
   217				na++
   218				start = i + len(sep)
   219				i += len(sep) - 1
   220			}
   221		}
   222		a[na] = s[start:]
   223		return a[0 : na+1]
   224	}
   225	
   226	// SplitN slices s into subslices separated by sep and returns a slice of
   227	// the subslices between those separators.
   228	// If sep is empty, SplitN splits after each UTF-8 sequence.
   229	// The count determines the number of subslices to return:
   230	//   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
   231	//   n == 0: the result is nil (zero subslices)
   232	//   n < 0: all subslices
   233	func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
   234	
   235	// SplitAfterN slices s into subslices after each instance of sep and
   236	// returns a slice of those subslices.
   237	// If sep is empty, SplitAfterN splits after each UTF-8 sequence.
   238	// The count determines the number of subslices to return:
   239	//   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
   240	//   n == 0: the result is nil (zero subslices)
   241	//   n < 0: all subslices
   242	func SplitAfterN(s, sep []byte, n int) [][]byte {
   243		return genSplit(s, sep, len(sep), n)
   244	}
   245	
   246	// Split slices s into all subslices separated by sep and returns a slice of
   247	// the subslices between those separators.
   248	// If sep is empty, Split splits after each UTF-8 sequence.
   249	// It is equivalent to SplitN with a count of -1.
   250	func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
   251	
   252	// SplitAfter slices s into all subslices after each instance of sep and
   253	// returns a slice of those subslices.
   254	// If sep is empty, SplitAfter splits after each UTF-8 sequence.
   255	// It is equivalent to SplitAfterN with a count of -1.
   256	func SplitAfter(s, sep []byte) [][]byte {
   257		return genSplit(s, sep, len(sep), -1)
   258	}
   259	
   260	// Fields splits the slice s around each instance of one or more consecutive white space
   261	// characters, returning a slice of subslices of s or an empty list if s contains only white space.
   262	func Fields(s []byte) [][]byte {
   263		return FieldsFunc(s, unicode.IsSpace)
   264	}
   265	
   266	// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
   267	// It splits the slice s at each run of code points c satisfying f(c) and
   268	// returns a slice of subslices of s.  If all code points in s satisfy f(c), or
   269	// len(s) == 0, an empty slice is returned.
   270	func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
   271		n := 0
   272		inField := false
   273		for i := 0; i < len(s); {
   274			r, size := utf8.DecodeRune(s[i:])
   275			wasInField := inField
   276			inField = !f(r)
   277			if inField && !wasInField {
   278				n++
   279			}
   280			i += size
   281		}
   282	
   283		a := make([][]byte, n)
   284		na := 0
   285		fieldStart := -1
   286		for i := 0; i <= len(s) && na < n; {
   287			r, size := utf8.DecodeRune(s[i:])
   288			if fieldStart < 0 && size > 0 && !f(r) {
   289				fieldStart = i
   290				i += size
   291				continue
   292			}
   293			if fieldStart >= 0 && (size == 0 || f(r)) {
   294				a[na] = s[fieldStart:i]
   295				na++
   296				fieldStart = -1
   297			}
   298			if size == 0 {
   299				break
   300			}
   301			i += size
   302		}
   303		return a[0:na]
   304	}
   305	
   306	// Join concatenates the elements of s to create a new byte slice. The separator
   307	// sep is placed between elements in the resulting slice.
   308	func Join(s [][]byte, sep []byte) []byte {
   309		if len(s) == 0 {
   310			return []byte{}
   311		}
   312		if len(s) == 1 {
   313			// Just return a copy.
   314			return append([]byte(nil), s[0]...)
   315		}
   316		n := len(sep) * (len(s) - 1)
   317		for _, v := range s {
   318			n += len(v)
   319		}
   320	
   321		b := make([]byte, n)
   322		bp := copy(b, s[0])
   323		for _, v := range s[1:] {
   324			bp += copy(b[bp:], sep)
   325			bp += copy(b[bp:], v)
   326		}
   327		return b
   328	}
   329	
   330	// HasPrefix tests whether the byte slice s begins with prefix.
   331	func HasPrefix(s, prefix []byte) bool {
   332		return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
   333	}
   334	
   335	// HasSuffix tests whether the byte slice s ends with suffix.
   336	func HasSuffix(s, suffix []byte) bool {
   337		return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
   338	}
   339	
   340	// Map returns a copy of the byte slice s with all its characters modified
   341	// according to the mapping function. If mapping returns a negative value, the character is
   342	// dropped from the string with no replacement.  The characters in s and the
   343	// output are interpreted as UTF-8-encoded Unicode code points.
   344	func Map(mapping func(r rune) rune, s []byte) []byte {
   345		// In the worst case, the slice can grow when mapped, making
   346		// things unpleasant.  But it's so rare we barge in assuming it's
   347		// fine.  It could also shrink but that falls out naturally.
   348		maxbytes := len(s) // length of b
   349		nbytes := 0        // number of bytes encoded in b
   350		b := make([]byte, maxbytes)
   351		for i := 0; i < len(s); {
   352			wid := 1
   353			r := rune(s[i])
   354			if r >= utf8.RuneSelf {
   355				r, wid = utf8.DecodeRune(s[i:])
   356			}
   357			r = mapping(r)
   358			if r >= 0 {
   359				rl := utf8.RuneLen(r)
   360				if rl < 0 {
   361					rl = len(string(utf8.RuneError))
   362				}
   363				if nbytes+rl > maxbytes {
   364					// Grow the buffer.
   365					maxbytes = maxbytes*2 + utf8.UTFMax
   366					nb := make([]byte, maxbytes)
   367					copy(nb, b[0:nbytes])
   368					b = nb
   369				}
   370				nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
   371			}
   372			i += wid
   373		}
   374		return b[0:nbytes]
   375	}
   376	
   377	// Repeat returns a new byte slice consisting of count copies of b.
   378	func Repeat(b []byte, count int) []byte {
   379		nb := make([]byte, len(b)*count)
   380		bp := 0
   381		for i := 0; i < count; i++ {
   382			bp += copy(nb[bp:], b)
   383		}
   384		return nb
   385	}
   386	
   387	// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case.
   388	func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
   389	
   390	// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case.
   391	func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
   392	
   393	// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case.
   394	func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
   395	
   396	// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
   397	// upper case, giving priority to the special casing rules.
   398	func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte {
   399		return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
   400	}
   401	
   402	// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
   403	// lower case, giving priority to the special casing rules.
   404	func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte {
   405		return Map(func(r rune) rune { return _case.ToLower(r) }, s)
   406	}
   407	
   408	// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
   409	// title case, giving priority to the special casing rules.
   410	func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte {
   411		return Map(func(r rune) rune { return _case.ToTitle(r) }, s)
   412	}
   413	
   414	// isSeparator reports whether the rune could mark a word boundary.
   415	// TODO: update when package unicode captures more of the properties.
   416	func isSeparator(r rune) bool {
   417		// ASCII alphanumerics and underscore are not separators
   418		if r <= 0x7F {
   419			switch {
   420			case '0' <= r && r <= '9':
   421				return false
   422			case 'a' <= r && r <= 'z':
   423				return false
   424			case 'A' <= r && r <= 'Z':
   425				return false
   426			case r == '_':
   427				return false
   428			}
   429			return true
   430		}
   431		// Letters and digits are not separators
   432		if unicode.IsLetter(r) || unicode.IsDigit(r) {
   433			return false
   434		}
   435		// Otherwise, all we can do for now is treat spaces as separators.
   436		return unicode.IsSpace(r)
   437	}
   438	
   439	// Title returns a copy of s with all Unicode letters that begin words
   440	// mapped to their title case.
   441	//
   442	// BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly.
   443	func Title(s []byte) []byte {
   444		// Use a closure here to remember state.
   445		// Hackish but effective. Depends on Map scanning in order and calling
   446		// the closure once per rune.
   447		prev := ' '
   448		return Map(
   449			func(r rune) rune {
   450				if isSeparator(prev) {
   451					prev = r
   452					return unicode.ToTitle(r)
   453				}
   454				prev = r
   455				return r
   456			},
   457			s)
   458	}
   459	
   460	// TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded
   461	// Unicode code points c that satisfy f(c).
   462	func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
   463		i := indexFunc(s, f, false)
   464		if i == -1 {
   465			return nil
   466		}
   467		return s[i:]
   468	}
   469	
   470	// TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8
   471	// encoded Unicode code points c that satisfy f(c).
   472	func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
   473		i := lastIndexFunc(s, f, false)
   474		if i >= 0 && s[i] >= utf8.RuneSelf {
   475			_, wid := utf8.DecodeRune(s[i:])
   476			i += wid
   477		} else {
   478			i++
   479		}
   480		return s[0:i]
   481	}
   482	
   483	// TrimFunc returns a subslice of s by slicing off all leading and trailing
   484	// UTF-8-encoded Unicode code points c that satisfy f(c).
   485	func TrimFunc(s []byte, f func(r rune) bool) []byte {
   486		return TrimRightFunc(TrimLeftFunc(s, f), f)
   487	}
   488	
   489	// TrimPrefix returns s without the provided leading prefix string.
   490	// If s doesn't start with prefix, s is returned unchanged.
   491	func TrimPrefix(s, prefix []byte) []byte {
   492		if HasPrefix(s, prefix) {
   493			return s[len(prefix):]
   494		}
   495		return s
   496	}
   497	
   498	// TrimSuffix returns s without the provided trailing suffix string.
   499	// If s doesn't end with suffix, s is returned unchanged.
   500	func TrimSuffix(s, suffix []byte) []byte {
   501		if HasSuffix(s, suffix) {
   502			return s[:len(s)-len(suffix)]
   503		}
   504		return s
   505	}
   506	
   507	// IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
   508	// It returns the byte index in s of the first Unicode
   509	// code point satisfying f(c), or -1 if none do.
   510	func IndexFunc(s []byte, f func(r rune) bool) int {
   511		return indexFunc(s, f, true)
   512	}
   513	
   514	// LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
   515	// It returns the byte index in s of the last Unicode
   516	// code point satisfying f(c), or -1 if none do.
   517	func LastIndexFunc(s []byte, f func(r rune) bool) int {
   518		return lastIndexFunc(s, f, true)
   519	}
   520	
   521	// indexFunc is the same as IndexFunc except that if
   522	// truth==false, the sense of the predicate function is
   523	// inverted.
   524	func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
   525		start := 0
   526		for start < len(s) {
   527			wid := 1
   528			r := rune(s[start])
   529			if r >= utf8.RuneSelf {
   530				r, wid = utf8.DecodeRune(s[start:])
   531			}
   532			if f(r) == truth {
   533				return start
   534			}
   535			start += wid
   536		}
   537		return -1
   538	}
   539	
   540	// lastIndexFunc is the same as LastIndexFunc except that if
   541	// truth==false, the sense of the predicate function is
   542	// inverted.
   543	func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
   544		for i := len(s); i > 0; {
   545			r, size := rune(s[i-1]), 1
   546			if r >= utf8.RuneSelf {
   547				r, size = utf8.DecodeLastRune(s[0:i])
   548			}
   549			i -= size
   550			if f(r) == truth {
   551				return i
   552			}
   553		}
   554		return -1
   555	}
   556	
   557	func makeCutsetFunc(cutset string) func(r rune) bool {
   558		return func(r rune) bool {
   559			for _, c := range cutset {
   560				if c == r {
   561					return true
   562				}
   563			}
   564			return false
   565		}
   566	}
   567	
   568	// Trim returns a subslice of s by slicing off all leading and
   569	// trailing UTF-8-encoded Unicode code points contained in cutset.
   570	func Trim(s []byte, cutset string) []byte {
   571		return TrimFunc(s, makeCutsetFunc(cutset))
   572	}
   573	
   574	// TrimLeft returns a subslice of s by slicing off all leading
   575	// UTF-8-encoded Unicode code points contained in cutset.
   576	func TrimLeft(s []byte, cutset string) []byte {
   577		return TrimLeftFunc(s, makeCutsetFunc(cutset))
   578	}
   579	
   580	// TrimRight returns a subslice of s by slicing off all trailing
   581	// UTF-8-encoded Unicode code points that are contained in cutset.
   582	func TrimRight(s []byte, cutset string) []byte {
   583		return TrimRightFunc(s, makeCutsetFunc(cutset))
   584	}
   585	
   586	// TrimSpace returns a subslice of s by slicing off all leading and
   587	// trailing white space, as defined by Unicode.
   588	func TrimSpace(s []byte) []byte {
   589		return TrimFunc(s, unicode.IsSpace)
   590	}
   591	
   592	// Runes returns a slice of runes (Unicode code points) equivalent to s.
   593	func Runes(s []byte) []rune {
   594		t := make([]rune, utf8.RuneCount(s))
   595		i := 0
   596		for len(s) > 0 {
   597			r, l := utf8.DecodeRune(s)
   598			t[i] = r
   599			i++
   600			s = s[l:]
   601		}
   602		return t
   603	}
   604	
   605	// Replace returns a copy of the slice s with the first n
   606	// non-overlapping instances of old replaced by new.
   607	// If n < 0, there is no limit on the number of replacements.
   608	func Replace(s, old, new []byte, n int) []byte {
   609		m := 0
   610		if n != 0 {
   611			// Compute number of replacements.
   612			m = Count(s, old)
   613		}
   614		if m == 0 {
   615			// Just return a copy.
   616			return append([]byte(nil), s...)
   617		}
   618		if n < 0 || m < n {
   619			n = m
   620		}
   621	
   622		// Apply replacements to buffer.
   623		t := make([]byte, len(s)+n*(len(new)-len(old)))
   624		w := 0
   625		start := 0
   626		for i := 0; i < n; i++ {
   627			j := start
   628			if len(old) == 0 {
   629				if i > 0 {
   630					_, wid := utf8.DecodeRune(s[start:])
   631					j += wid
   632				}
   633			} else {
   634				j += Index(s[start:], old)
   635			}
   636			w += copy(t[w:], s[start:j])
   637			w += copy(t[w:], new)
   638			start = j + len(old)
   639		}
   640		w += copy(t[w:], s[start:])
   641		return t[0:w]
   642	}
   643	
   644	// EqualFold reports whether s and t, interpreted as UTF-8 strings,
   645	// are equal under Unicode case-folding.
   646	func EqualFold(s, t []byte) bool {
   647		for len(s) != 0 && len(t) != 0 {
   648			// Extract first rune from each.
   649			var sr, tr rune
   650			if s[0] < utf8.RuneSelf {
   651				sr, s = rune(s[0]), s[1:]
   652			} else {
   653				r, size := utf8.DecodeRune(s)
   654				sr, s = r, s[size:]
   655			}
   656			if t[0] < utf8.RuneSelf {
   657				tr, t = rune(t[0]), t[1:]
   658			} else {
   659				r, size := utf8.DecodeRune(t)
   660				tr, t = r, t[size:]
   661			}
   662	
   663			// If they match, keep going; if not, return false.
   664	
   665			// Easy case.
   666			if tr == sr {
   667				continue
   668			}
   669	
   670			// Make sr < tr to simplify what follows.
   671			if tr < sr {
   672				tr, sr = sr, tr
   673			}
   674			// Fast check for ASCII.
   675			if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
   676				// ASCII, and sr is upper case.  tr must be lower case.
   677				if tr == sr+'a'-'A' {
   678					continue
   679				}
   680				return false
   681			}
   682	
   683			// General case.  SimpleFold(x) returns the next equivalent rune > x
   684			// or wraps around to smaller values.
   685			r := unicode.SimpleFold(sr)
   686			for r != sr && r < tr {
   687				r = unicode.SimpleFold(r)
   688			}
   689			if r == tr {
   690				continue
   691			}
   692			return false
   693		}
   694	
   695		// One string is empty.  Are both?
   696		return len(s) == len(t)
   697	}
   698	

View as plain text