...

Source file src/bytes/bytes.go

Documentation: bytes

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package bytes implements functions for the manipulation of byte slices.
     6  // It is analogous to the facilities of the strings package.
     7  package bytes
     8  
     9  import (
    10  	"internal/bytealg"
    11  	"unicode"
    12  	"unicode/utf8"
    13  )
    14  
    15  // Equal returns a boolean reporting whether a and b
    16  // are the same length and contain the same bytes.
    17  // A nil argument is equivalent to an empty slice.
    18  func Equal(a, b []byte) bool {
    19  	return bytealg.Equal(a, b)
    20  }
    21  
    22  func equalPortable(a, b []byte) bool {
    23  	if len(a) != len(b) {
    24  		return false
    25  	}
    26  	for i, c := range a {
    27  		if c != b[i] {
    28  			return false
    29  		}
    30  	}
    31  	return true
    32  }
    33  
    34  // Compare returns an integer comparing two byte slices lexicographically.
    35  // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
    36  // A nil argument is equivalent to an empty slice.
    37  func Compare(a, b []byte) int {
    38  	return bytealg.Compare(a, b)
    39  }
    40  
    41  // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
    42  // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
    43  func explode(s []byte, n int) [][]byte {
    44  	if n <= 0 {
    45  		n = len(s)
    46  	}
    47  	a := make([][]byte, n)
    48  	var size int
    49  	na := 0
    50  	for len(s) > 0 {
    51  		if na+1 >= n {
    52  			a[na] = s
    53  			na++
    54  			break
    55  		}
    56  		_, size = utf8.DecodeRune(s)
    57  		a[na] = s[0:size:size]
    58  		s = s[size:]
    59  		na++
    60  	}
    61  	return a[0:na]
    62  }
    63  
    64  // Count counts the number of non-overlapping instances of sep in s.
    65  // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
    66  func Count(s, sep []byte) int {
    67  	// special case
    68  	if len(sep) == 0 {
    69  		return utf8.RuneCount(s) + 1
    70  	}
    71  	if len(sep) == 1 {
    72  		return bytealg.Count(s, sep[0])
    73  	}
    74  	n := 0
    75  	for {
    76  		i := Index(s, sep)
    77  		if i == -1 {
    78  			return n
    79  		}
    80  		n++
    81  		s = s[i+len(sep):]
    82  	}
    83  }
    84  
    85  // Contains reports whether subslice is within b.
    86  func Contains(b, subslice []byte) bool {
    87  	return Index(b, subslice) != -1
    88  }
    89  
    90  // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b.
    91  func ContainsAny(b []byte, chars string) bool {
    92  	return IndexAny(b, chars) >= 0
    93  }
    94  
    95  // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b.
    96  func ContainsRune(b []byte, r rune) bool {
    97  	return IndexRune(b, r) >= 0
    98  }
    99  
   100  // IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b.
   101  func IndexByte(b []byte, c byte) int {
   102  	return bytealg.IndexByte(b, c)
   103  }
   104  
   105  func indexBytePortable(s []byte, c byte) int {
   106  	for i, b := range s {
   107  		if b == c {
   108  			return i
   109  		}
   110  	}
   111  	return -1
   112  }
   113  
   114  // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
   115  func LastIndex(s, sep []byte) int {
   116  	n := len(sep)
   117  	if n == 0 {
   118  		return len(s)
   119  	}
   120  	c := sep[0]
   121  	for i := len(s) - n; i >= 0; i-- {
   122  		if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) {
   123  			return i
   124  		}
   125  	}
   126  	return -1
   127  }
   128  
   129  // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
   130  func LastIndexByte(s []byte, c byte) int {
   131  	for i := len(s) - 1; i >= 0; i-- {
   132  		if s[i] == c {
   133  			return i
   134  		}
   135  	}
   136  	return -1
   137  }
   138  
   139  // IndexRune interprets s as a sequence of UTF-8-encoded code points.
   140  // It returns the byte index of the first occurrence in s of the given rune.
   141  // It returns -1 if rune is not present in s.
   142  // If r is utf8.RuneError, it returns the first instance of any
   143  // invalid UTF-8 byte sequence.
   144  func IndexRune(s []byte, r rune) int {
   145  	switch {
   146  	case 0 <= r && r < utf8.RuneSelf:
   147  		return IndexByte(s, byte(r))
   148  	case r == utf8.RuneError:
   149  		for i := 0; i < len(s); {
   150  			r1, n := utf8.DecodeRune(s[i:])
   151  			if r1 == utf8.RuneError {
   152  				return i
   153  			}
   154  			i += n
   155  		}
   156  		return -1
   157  	case !utf8.ValidRune(r):
   158  		return -1
   159  	default:
   160  		var b [utf8.UTFMax]byte
   161  		n := utf8.EncodeRune(b[:], r)
   162  		return Index(s, b[:n])
   163  	}
   164  }
   165  
   166  // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
   167  // It returns the byte index of the first occurrence in s of any of the Unicode
   168  // code points in chars. It returns -1 if chars is empty or if there is no code
   169  // point in common.
   170  func IndexAny(s []byte, chars string) int {
   171  	if chars == "" {
   172  		// Avoid scanning all of s.
   173  		return -1
   174  	}
   175  	if len(s) > 8 {
   176  		if as, isASCII := makeASCIISet(chars); isASCII {
   177  			for i, c := range s {
   178  				if as.contains(c) {
   179  					return i
   180  				}
   181  			}
   182  			return -1
   183  		}
   184  	}
   185  	var width int
   186  	for i := 0; i < len(s); i += width {
   187  		r := rune(s[i])
   188  		if r < utf8.RuneSelf {
   189  			width = 1
   190  		} else {
   191  			r, width = utf8.DecodeRune(s[i:])
   192  		}
   193  		for _, ch := range chars {
   194  			if r == ch {
   195  				return i
   196  			}
   197  		}
   198  	}
   199  	return -1
   200  }
   201  
   202  // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
   203  // points. It returns the byte index of the last occurrence in s of any of
   204  // the Unicode code points in chars. It returns -1 if chars is empty or if
   205  // there is no code point in common.
   206  func LastIndexAny(s []byte, chars string) int {
   207  	if chars == "" {
   208  		// Avoid scanning all of s.
   209  		return -1
   210  	}
   211  	if len(s) > 8 {
   212  		if as, isASCII := makeASCIISet(chars); isASCII {
   213  			for i := len(s) - 1; i >= 0; i-- {
   214  				if as.contains(s[i]) {
   215  					return i
   216  				}
   217  			}
   218  			return -1
   219  		}
   220  	}
   221  	for i := len(s); i > 0; {
   222  		r, size := utf8.DecodeLastRune(s[:i])
   223  		i -= size
   224  		for _, c := range chars {
   225  			if r == c {
   226  				return i
   227  			}
   228  		}
   229  	}
   230  	return -1
   231  }
   232  
   233  // Generic split: splits after each instance of sep,
   234  // including sepSave bytes of sep in the subslices.
   235  func genSplit(s, sep []byte, sepSave, n int) [][]byte {
   236  	if n == 0 {
   237  		return nil
   238  	}
   239  	if len(sep) == 0 {
   240  		return explode(s, n)
   241  	}
   242  	if n < 0 {
   243  		n = Count(s, sep) + 1
   244  	}
   245  
   246  	a := make([][]byte, n)
   247  	n--
   248  	i := 0
   249  	for i < n {
   250  		m := Index(s, sep)
   251  		if m < 0 {
   252  			break
   253  		}
   254  		a[i] = s[: m+sepSave : m+sepSave]
   255  		s = s[m+len(sep):]
   256  		i++
   257  	}
   258  	a[i] = s
   259  	return a[:i+1]
   260  }
   261  
   262  // SplitN slices s into subslices separated by sep and returns a slice of
   263  // the subslices between those separators.
   264  // If sep is empty, SplitN splits after each UTF-8 sequence.
   265  // The count determines the number of subslices to return:
   266  //   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
   267  //   n == 0: the result is nil (zero subslices)
   268  //   n < 0: all subslices
   269  func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
   270  
   271  // SplitAfterN slices s into subslices after each instance of sep and
   272  // returns a slice of those subslices.
   273  // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
   274  // The count determines the number of subslices to return:
   275  //   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
   276  //   n == 0: the result is nil (zero subslices)
   277  //   n < 0: all subslices
   278  func SplitAfterN(s, sep []byte, n int) [][]byte {
   279  	return genSplit(s, sep, len(sep), n)
   280  }
   281  
   282  // Split slices s into all subslices separated by sep and returns a slice of
   283  // the subslices between those separators.
   284  // If sep is empty, Split splits after each UTF-8 sequence.
   285  // It is equivalent to SplitN with a count of -1.
   286  func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
   287  
   288  // SplitAfter slices s into all subslices after each instance of sep and
   289  // returns a slice of those subslices.
   290  // If sep is empty, SplitAfter splits after each UTF-8 sequence.
   291  // It is equivalent to SplitAfterN with a count of -1.
   292  func SplitAfter(s, sep []byte) [][]byte {
   293  	return genSplit(s, sep, len(sep), -1)
   294  }
   295  
   296  var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
   297  
   298  // Fields interprets s as a sequence of UTF-8-encoded code points.
   299  // It splits the slice s around each instance of one or more consecutive white space
   300  // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
   301  // empty slice if s contains only white space.
   302  func Fields(s []byte) [][]byte {
   303  	// First count the fields.
   304  	// This is an exact count if s is ASCII, otherwise it is an approximation.
   305  	n := 0
   306  	wasSpace := 1
   307  	// setBits is used to track which bits are set in the bytes of s.
   308  	setBits := uint8(0)
   309  	for i := 0; i < len(s); i++ {
   310  		r := s[i]
   311  		setBits |= r
   312  		isSpace := int(asciiSpace[r])
   313  		n += wasSpace & ^isSpace
   314  		wasSpace = isSpace
   315  	}
   316  
   317  	if setBits >= utf8.RuneSelf {
   318  		// Some runes in the input slice are not ASCII.
   319  		return FieldsFunc(s, unicode.IsSpace)
   320  	}
   321  
   322  	// ASCII fast path
   323  	a := make([][]byte, n)
   324  	na := 0
   325  	fieldStart := 0
   326  	i := 0
   327  	// Skip spaces in the front of the input.
   328  	for i < len(s) && asciiSpace[s[i]] != 0 {
   329  		i++
   330  	}
   331  	fieldStart = i
   332  	for i < len(s) {
   333  		if asciiSpace[s[i]] == 0 {
   334  			i++
   335  			continue
   336  		}
   337  		a[na] = s[fieldStart:i:i]
   338  		na++
   339  		i++
   340  		// Skip spaces in between fields.
   341  		for i < len(s) && asciiSpace[s[i]] != 0 {
   342  			i++
   343  		}
   344  		fieldStart = i
   345  	}
   346  	if fieldStart < len(s) { // Last field might end at EOF.
   347  		a[na] = s[fieldStart:len(s):len(s)]
   348  	}
   349  	return a
   350  }
   351  
   352  // FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
   353  // It splits the slice s at each run of code points c satisfying f(c) and
   354  // returns a slice of subslices of s. If all code points in s satisfy f(c), or
   355  // len(s) == 0, an empty slice is returned.
   356  // FieldsFunc makes no guarantees about the order in which it calls f(c).
   357  // If f does not return consistent results for a given c, FieldsFunc may crash.
   358  func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
   359  	// A span is used to record a slice of s of the form s[start:end].
   360  	// The start index is inclusive and the end index is exclusive.
   361  	type span struct {
   362  		start int
   363  		end   int
   364  	}
   365  	spans := make([]span, 0, 32)
   366  
   367  	// Find the field start and end indices.
   368  	wasField := false
   369  	fromIndex := 0
   370  	for i := 0; i < len(s); {
   371  		size := 1
   372  		r := rune(s[i])
   373  		if r >= utf8.RuneSelf {
   374  			r, size = utf8.DecodeRune(s[i:])
   375  		}
   376  		if f(r) {
   377  			if wasField {
   378  				spans = append(spans, span{start: fromIndex, end: i})
   379  				wasField = false
   380  			}
   381  		} else {
   382  			if !wasField {
   383  				fromIndex = i
   384  				wasField = true
   385  			}
   386  		}
   387  		i += size
   388  	}
   389  
   390  	// Last field might end at EOF.
   391  	if wasField {
   392  		spans = append(spans, span{fromIndex, len(s)})
   393  	}
   394  
   395  	// Create subslices from recorded field indices.
   396  	a := make([][]byte, len(spans))
   397  	for i, span := range spans {
   398  		a[i] = s[span.start:span.end:span.end]
   399  	}
   400  
   401  	return a
   402  }
   403  
   404  // Join concatenates the elements of s to create a new byte slice. The separator
   405  // sep is placed between elements in the resulting slice.
   406  func Join(s [][]byte, sep []byte) []byte {
   407  	if len(s) == 0 {
   408  		return []byte{}
   409  	}
   410  	if len(s) == 1 {
   411  		// Just return a copy.
   412  		return append([]byte(nil), s[0]...)
   413  	}
   414  	n := len(sep) * (len(s) - 1)
   415  	for _, v := range s {
   416  		n += len(v)
   417  	}
   418  
   419  	b := make([]byte, n)
   420  	bp := copy(b, s[0])
   421  	for _, v := range s[1:] {
   422  		bp += copy(b[bp:], sep)
   423  		bp += copy(b[bp:], v)
   424  	}
   425  	return b
   426  }
   427  
   428  // HasPrefix tests whether the byte slice s begins with prefix.
   429  func HasPrefix(s, prefix []byte) bool {
   430  	return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
   431  }
   432  
   433  // HasSuffix tests whether the byte slice s ends with suffix.
   434  func HasSuffix(s, suffix []byte) bool {
   435  	return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
   436  }
   437  
   438  // Map returns a copy of the byte slice s with all its characters modified
   439  // according to the mapping function. If mapping returns a negative value, the character is
   440  // dropped from the byte slice with no replacement. The characters in s and the
   441  // output are interpreted as UTF-8-encoded code points.
   442  func Map(mapping func(r rune) rune, s []byte) []byte {
   443  	// In the worst case, the slice can grow when mapped, making
   444  	// things unpleasant. But it's so rare we barge in assuming it's
   445  	// fine. It could also shrink but that falls out naturally.
   446  	maxbytes := len(s) // length of b
   447  	nbytes := 0        // number of bytes encoded in b
   448  	b := make([]byte, maxbytes)
   449  	for i := 0; i < len(s); {
   450  		wid := 1
   451  		r := rune(s[i])
   452  		if r >= utf8.RuneSelf {
   453  			r, wid = utf8.DecodeRune(s[i:])
   454  		}
   455  		r = mapping(r)
   456  		if r >= 0 {
   457  			rl := utf8.RuneLen(r)
   458  			if rl < 0 {
   459  				rl = len(string(utf8.RuneError))
   460  			}
   461  			if nbytes+rl > maxbytes {
   462  				// Grow the buffer.
   463  				maxbytes = maxbytes*2 + utf8.UTFMax
   464  				nb := make([]byte, maxbytes)
   465  				copy(nb, b[0:nbytes])
   466  				b = nb
   467  			}
   468  			nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
   469  		}
   470  		i += wid
   471  	}
   472  	return b[0:nbytes]
   473  }
   474  
   475  // Repeat returns a new byte slice consisting of count copies of b.
   476  //
   477  // It panics if count is negative or if
   478  // the result of (len(b) * count) overflows.
   479  func Repeat(b []byte, count int) []byte {
   480  	// Since we cannot return an error on overflow,
   481  	// we should panic if the repeat will generate
   482  	// an overflow.
   483  	// See Issue golang.org/issue/16237.
   484  	if count < 0 {
   485  		panic("bytes: negative Repeat count")
   486  	} else if count > 0 && len(b)*count/count != len(b) {
   487  		panic("bytes: Repeat count causes overflow")
   488  	}
   489  
   490  	nb := make([]byte, len(b)*count)
   491  	bp := copy(nb, b)
   492  	for bp < len(nb) {
   493  		copy(nb[bp:], nb[:bp])
   494  		bp *= 2
   495  	}
   496  	return nb
   497  }
   498  
   499  // ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
   500  func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
   501  
   502  // ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
   503  func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
   504  
   505  // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
   506  func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
   507  
   508  // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
   509  // upper case, giving priority to the special casing rules.
   510  func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
   511  	return Map(c.ToUpper, s)
   512  }
   513  
   514  // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
   515  // lower case, giving priority to the special casing rules.
   516  func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
   517  	return Map(c.ToLower, s)
   518  }
   519  
   520  // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
   521  // title case, giving priority to the special casing rules.
   522  func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
   523  	return Map(c.ToTitle, s)
   524  }
   525  
   526  // isSeparator reports whether the rune could mark a word boundary.
   527  // TODO: update when package unicode captures more of the properties.
   528  func isSeparator(r rune) bool {
   529  	// ASCII alphanumerics and underscore are not separators
   530  	if r <= 0x7F {
   531  		switch {
   532  		case '0' <= r && r <= '9':
   533  			return false
   534  		case 'a' <= r && r <= 'z':
   535  			return false
   536  		case 'A' <= r && r <= 'Z':
   537  			return false
   538  		case r == '_':
   539  			return false
   540  		}
   541  		return true
   542  	}
   543  	// Letters and digits are not separators
   544  	if unicode.IsLetter(r) || unicode.IsDigit(r) {
   545  		return false
   546  	}
   547  	// Otherwise, all we can do for now is treat spaces as separators.
   548  	return unicode.IsSpace(r)
   549  }
   550  
   551  // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
   552  // words mapped to their title case.
   553  //
   554  // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
   555  func Title(s []byte) []byte {
   556  	// Use a closure here to remember state.
   557  	// Hackish but effective. Depends on Map scanning in order and calling
   558  	// the closure once per rune.
   559  	prev := ' '
   560  	return Map(
   561  		func(r rune) rune {
   562  			if isSeparator(prev) {
   563  				prev = r
   564  				return unicode.ToTitle(r)
   565  			}
   566  			prev = r
   567  			return r
   568  		},
   569  		s)
   570  }
   571  
   572  // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
   573  // all leading UTF-8-encoded code points c that satisfy f(c).
   574  func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
   575  	i := indexFunc(s, f, false)
   576  	if i == -1 {
   577  		return nil
   578  	}
   579  	return s[i:]
   580  }
   581  
   582  // TrimRightFunc returns a subslice of s by slicing off all trailing
   583  // UTF-8-encoded code points c that satisfy f(c).
   584  func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
   585  	i := lastIndexFunc(s, f, false)
   586  	if i >= 0 && s[i] >= utf8.RuneSelf {
   587  		_, wid := utf8.DecodeRune(s[i:])
   588  		i += wid
   589  	} else {
   590  		i++
   591  	}
   592  	return s[0:i]
   593  }
   594  
   595  // TrimFunc returns a subslice of s by slicing off all leading and trailing
   596  // UTF-8-encoded code points c that satisfy f(c).
   597  func TrimFunc(s []byte, f func(r rune) bool) []byte {
   598  	return TrimRightFunc(TrimLeftFunc(s, f), f)
   599  }
   600  
   601  // TrimPrefix returns s without the provided leading prefix string.
   602  // If s doesn't start with prefix, s is returned unchanged.
   603  func TrimPrefix(s, prefix []byte) []byte {
   604  	if HasPrefix(s, prefix) {
   605  		return s[len(prefix):]
   606  	}
   607  	return s
   608  }
   609  
   610  // TrimSuffix returns s without the provided trailing suffix string.
   611  // If s doesn't end with suffix, s is returned unchanged.
   612  func TrimSuffix(s, suffix []byte) []byte {
   613  	if HasSuffix(s, suffix) {
   614  		return s[:len(s)-len(suffix)]
   615  	}
   616  	return s
   617  }
   618  
   619  // IndexFunc interprets s as a sequence of UTF-8-encoded code points.
   620  // It returns the byte index in s of the first Unicode
   621  // code point satisfying f(c), or -1 if none do.
   622  func IndexFunc(s []byte, f func(r rune) bool) int {
   623  	return indexFunc(s, f, true)
   624  }
   625  
   626  // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points.
   627  // It returns the byte index in s of the last Unicode
   628  // code point satisfying f(c), or -1 if none do.
   629  func LastIndexFunc(s []byte, f func(r rune) bool) int {
   630  	return lastIndexFunc(s, f, true)
   631  }
   632  
   633  // indexFunc is the same as IndexFunc except that if
   634  // truth==false, the sense of the predicate function is
   635  // inverted.
   636  func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
   637  	start := 0
   638  	for start < len(s) {
   639  		wid := 1
   640  		r := rune(s[start])
   641  		if r >= utf8.RuneSelf {
   642  			r, wid = utf8.DecodeRune(s[start:])
   643  		}
   644  		if f(r) == truth {
   645  			return start
   646  		}
   647  		start += wid
   648  	}
   649  	return -1
   650  }
   651  
   652  // lastIndexFunc is the same as LastIndexFunc except that if
   653  // truth==false, the sense of the predicate function is
   654  // inverted.
   655  func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
   656  	for i := len(s); i > 0; {
   657  		r, size := rune(s[i-1]), 1
   658  		if r >= utf8.RuneSelf {
   659  			r, size = utf8.DecodeLastRune(s[0:i])
   660  		}
   661  		i -= size
   662  		if f(r) == truth {
   663  			return i
   664  		}
   665  	}
   666  	return -1
   667  }
   668  
   669  // asciiSet is a 32-byte value, where each bit represents the presence of a
   670  // given ASCII character in the set. The 128-bits of the lower 16 bytes,
   671  // starting with the least-significant bit of the lowest word to the
   672  // most-significant bit of the highest word, map to the full range of all
   673  // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
   674  // ensuring that any non-ASCII character will be reported as not in the set.
   675  type asciiSet [8]uint32
   676  
   677  // makeASCIISet creates a set of ASCII characters and reports whether all
   678  // characters in chars are ASCII.
   679  func makeASCIISet(chars string) (as asciiSet, ok bool) {
   680  	for i := 0; i < len(chars); i++ {
   681  		c := chars[i]
   682  		if c >= utf8.RuneSelf {
   683  			return as, false
   684  		}
   685  		as[c>>5] |= 1 << uint(c&31)
   686  	}
   687  	return as, true
   688  }
   689  
   690  // contains reports whether c is inside the set.
   691  func (as *asciiSet) contains(c byte) bool {
   692  	return (as[c>>5] & (1 << uint(c&31))) != 0
   693  }
   694  
   695  func makeCutsetFunc(cutset string) func(r rune) bool {
   696  	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
   697  		return func(r rune) bool {
   698  			return r == rune(cutset[0])
   699  		}
   700  	}
   701  	if as, isASCII := makeASCIISet(cutset); isASCII {
   702  		return func(r rune) bool {
   703  			return r < utf8.RuneSelf && as.contains(byte(r))
   704  		}
   705  	}
   706  	return func(r rune) bool {
   707  		for _, c := range cutset {
   708  			if c == r {
   709  				return true
   710  			}
   711  		}
   712  		return false
   713  	}
   714  }
   715  
   716  // Trim returns a subslice of s by slicing off all leading and
   717  // trailing UTF-8-encoded code points contained in cutset.
   718  func Trim(s []byte, cutset string) []byte {
   719  	return TrimFunc(s, makeCutsetFunc(cutset))
   720  }
   721  
   722  // TrimLeft returns a subslice of s by slicing off all leading
   723  // UTF-8-encoded code points contained in cutset.
   724  func TrimLeft(s []byte, cutset string) []byte {
   725  	return TrimLeftFunc(s, makeCutsetFunc(cutset))
   726  }
   727  
   728  // TrimRight returns a subslice of s by slicing off all trailing
   729  // UTF-8-encoded code points that are contained in cutset.
   730  func TrimRight(s []byte, cutset string) []byte {
   731  	return TrimRightFunc(s, makeCutsetFunc(cutset))
   732  }
   733  
   734  // TrimSpace returns a subslice of s by slicing off all leading and
   735  // trailing white space, as defined by Unicode.
   736  func TrimSpace(s []byte) []byte {
   737  	return TrimFunc(s, unicode.IsSpace)
   738  }
   739  
   740  // Runes interprets s as a sequence of UTF-8-encoded code points.
   741  // It returns a slice of runes (Unicode code points) equivalent to s.
   742  func Runes(s []byte) []rune {
   743  	t := make([]rune, utf8.RuneCount(s))
   744  	i := 0
   745  	for len(s) > 0 {
   746  		r, l := utf8.DecodeRune(s)
   747  		t[i] = r
   748  		i++
   749  		s = s[l:]
   750  	}
   751  	return t
   752  }
   753  
   754  // Replace returns a copy of the slice s with the first n
   755  // non-overlapping instances of old replaced by new.
   756  // If old is empty, it matches at the beginning of the slice
   757  // and after each UTF-8 sequence, yielding up to k+1 replacements
   758  // for a k-rune slice.
   759  // If n < 0, there is no limit on the number of replacements.
   760  func Replace(s, old, new []byte, n int) []byte {
   761  	m := 0
   762  	if n != 0 {
   763  		// Compute number of replacements.
   764  		m = Count(s, old)
   765  	}
   766  	if m == 0 {
   767  		// Just return a copy.
   768  		return append([]byte(nil), s...)
   769  	}
   770  	if n < 0 || m < n {
   771  		n = m
   772  	}
   773  
   774  	// Apply replacements to buffer.
   775  	t := make([]byte, len(s)+n*(len(new)-len(old)))
   776  	w := 0
   777  	start := 0
   778  	for i := 0; i < n; i++ {
   779  		j := start
   780  		if len(old) == 0 {
   781  			if i > 0 {
   782  				_, wid := utf8.DecodeRune(s[start:])
   783  				j += wid
   784  			}
   785  		} else {
   786  			j += Index(s[start:], old)
   787  		}
   788  		w += copy(t[w:], s[start:j])
   789  		w += copy(t[w:], new)
   790  		start = j + len(old)
   791  	}
   792  	w += copy(t[w:], s[start:])
   793  	return t[0:w]
   794  }
   795  
   796  // ReplaceAll returns a copy of the slice s with all
   797  // non-overlapping instances of old replaced by new.
   798  // If old is empty, it matches at the beginning of the slice
   799  // and after each UTF-8 sequence, yielding up to k+1 replacements
   800  // for a k-rune slice.
   801  func ReplaceAll(s, old, new []byte) []byte {
   802  	return Replace(s, old, new, -1)
   803  }
   804  
   805  // EqualFold reports whether s and t, interpreted as UTF-8 strings,
   806  // are equal under Unicode case-folding.
   807  func EqualFold(s, t []byte) bool {
   808  	for len(s) != 0 && len(t) != 0 {
   809  		// Extract first rune from each.
   810  		var sr, tr rune
   811  		if s[0] < utf8.RuneSelf {
   812  			sr, s = rune(s[0]), s[1:]
   813  		} else {
   814  			r, size := utf8.DecodeRune(s)
   815  			sr, s = r, s[size:]
   816  		}
   817  		if t[0] < utf8.RuneSelf {
   818  			tr, t = rune(t[0]), t[1:]
   819  		} else {
   820  			r, size := utf8.DecodeRune(t)
   821  			tr, t = r, t[size:]
   822  		}
   823  
   824  		// If they match, keep going; if not, return false.
   825  
   826  		// Easy case.
   827  		if tr == sr {
   828  			continue
   829  		}
   830  
   831  		// Make sr < tr to simplify what follows.
   832  		if tr < sr {
   833  			tr, sr = sr, tr
   834  		}
   835  		// Fast check for ASCII.
   836  		if tr < utf8.RuneSelf {
   837  			// ASCII only, sr/tr must be upper/lower case
   838  			if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
   839  				continue
   840  			}
   841  			return false
   842  		}
   843  
   844  		// General case. SimpleFold(x) returns the next equivalent rune > x
   845  		// or wraps around to smaller values.
   846  		r := unicode.SimpleFold(sr)
   847  		for r != sr && r < tr {
   848  			r = unicode.SimpleFold(r)
   849  		}
   850  		if r == tr {
   851  			continue
   852  		}
   853  		return false
   854  	}
   855  
   856  	// One string is empty. Are both?
   857  	return len(s) == len(t)
   858  }
   859  
   860  // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
   861  func Index(s, sep []byte) int {
   862  	n := len(sep)
   863  	switch {
   864  	case n == 0:
   865  		return 0
   866  	case n == 1:
   867  		return IndexByte(s, sep[0])
   868  	case n == len(s):
   869  		if Equal(sep, s) {
   870  			return 0
   871  		}
   872  		return -1
   873  	case n > len(s):
   874  		return -1
   875  	case n <= bytealg.MaxLen:
   876  		// Use brute force when s and sep both are small
   877  		if len(s) <= bytealg.MaxBruteForce {
   878  			return bytealg.Index(s, sep)
   879  		}
   880  		c0 := sep[0]
   881  		c1 := sep[1]
   882  		i := 0
   883  		t := len(s) - n + 1
   884  		fails := 0
   885  		for i < t {
   886  			if s[i] != c0 {
   887  				// IndexByte is faster than bytealg.Index, so use it as long as
   888  				// we're not getting lots of false positives.
   889  				o := IndexByte(s[i:t], c0)
   890  				if o < 0 {
   891  					return -1
   892  				}
   893  				i += o
   894  			}
   895  			if s[i+1] == c1 && Equal(s[i:i+n], sep) {
   896  				return i
   897  			}
   898  			fails++
   899  			i++
   900  			// Switch to bytealg.Index when IndexByte produces too many false positives.
   901  			if fails > bytealg.Cutover(i) {
   902  				r := bytealg.Index(s[i:], sep)
   903  				if r >= 0 {
   904  					return r + i
   905  				}
   906  				return -1
   907  			}
   908  		}
   909  		return -1
   910  	}
   911  	c0 := sep[0]
   912  	c1 := sep[1]
   913  	i := 0
   914  	fails := 0
   915  	t := len(s) - n + 1
   916  	for i < t {
   917  		if s[i] != c0 {
   918  			o := IndexByte(s[i:t], c0)
   919  			if o < 0 {
   920  				break
   921  			}
   922  			i += o
   923  		}
   924  		if s[i+1] == c1 && Equal(s[i:i+n], sep) {
   925  			return i
   926  		}
   927  		i++
   928  		fails++
   929  		if fails >= 4+i>>4 && i < t {
   930  			// Give up on IndexByte, it isn't skipping ahead
   931  			// far enough to be better than Rabin-Karp.
   932  			// Experiments (using IndexPeriodic) suggest
   933  			// the cutover is about 16 byte skips.
   934  			// TODO: if large prefixes of sep are matching
   935  			// we should cutover at even larger average skips,
   936  			// because Equal becomes that much more expensive.
   937  			// This code does not take that effect into account.
   938  			j := indexRabinKarp(s[i:], sep)
   939  			if j < 0 {
   940  				return -1
   941  			}
   942  			return i + j
   943  		}
   944  	}
   945  	return -1
   946  }
   947  
   948  func indexRabinKarp(s, sep []byte) int {
   949  	// Rabin-Karp search
   950  	hashsep, pow := hashStr(sep)
   951  	n := len(sep)
   952  	var h uint32
   953  	for i := 0; i < n; i++ {
   954  		h = h*primeRK + uint32(s[i])
   955  	}
   956  	if h == hashsep && Equal(s[:n], sep) {
   957  		return 0
   958  	}
   959  	for i := n; i < len(s); {
   960  		h *= primeRK
   961  		h += uint32(s[i])
   962  		h -= pow * uint32(s[i-n])
   963  		i++
   964  		if h == hashsep && Equal(s[i-n:i], sep) {
   965  			return i - n
   966  		}
   967  	}
   968  	return -1
   969  }
   970  
   971  // primeRK is the prime base used in Rabin-Karp algorithm.
   972  const primeRK = 16777619
   973  
   974  // hashStr returns the hash and the appropriate multiplicative
   975  // factor for use in Rabin-Karp algorithm.
   976  func hashStr(sep []byte) (uint32, uint32) {
   977  	hash := uint32(0)
   978  	for i := 0; i < len(sep); i++ {
   979  		hash = hash*primeRK + uint32(sep[i])
   980  	}
   981  	var pow, sq uint32 = 1, primeRK
   982  	for i := len(sep); i > 0; i >>= 1 {
   983  		if i&1 != 0 {
   984  			pow *= sq
   985  		}
   986  		sq *= sq
   987  	}
   988  	return hash, pow
   989  }
   990  

View as plain text