The Go Programming Language

Source file src/pkg/strings/strings.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package strings implements simple functions to manipulate strings.
     6	package strings
     7	
     8	import (
     9		"unicode"
    10		"utf8"
    11	)
    12	
    13	// explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n < 0 means no limit).
    14	// Invalid UTF-8 sequences become correct encodings of U+FFF8.
    15	func explode(s string, n int) []string {
    16		if n == 0 {
    17			return nil
    18		}
    19		l := utf8.RuneCountInString(s)
    20		if n <= 0 || n > l {
    21			n = l
    22		}
    23		a := make([]string, n)
    24		var size, rune int
    25		i, cur := 0, 0
    26		for ; i+1 < n; i++ {
    27			rune, size = utf8.DecodeRuneInString(s[cur:])
    28			a[i] = string(rune)
    29			cur += size
    30		}
    31		// add the rest, if there is any
    32		if cur < len(s) {
    33			a[i] = s[cur:]
    34		}
    35		return a
    36	}
    37	
    38	// Count counts the number of non-overlapping instances of sep in s.
    39	func Count(s, sep string) int {
    40		if sep == "" {
    41			return utf8.RuneCountInString(s) + 1
    42		}
    43		c := sep[0]
    44		l := len(sep)
    45		n := 0
    46		if l == 1 {
    47			// special case worth making fast
    48			for i := 0; i < len(s); i++ {
    49				if s[i] == c {
    50					n++
    51				}
    52			}
    53			return n
    54		}
    55		for i := 0; i+l <= len(s); i++ {
    56			if s[i] == c && s[i:i+l] == sep {
    57				n++
    58				i += l - 1
    59			}
    60		}
    61		return n
    62	}
    63	
    64	// Contains returns true if substr is within s.
    65	func Contains(s, substr string) bool {
    66		return Index(s, substr) != -1
    67	}
    68	
    69	// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
    70	func Index(s, sep string) int {
    71		n := len(sep)
    72		if n == 0 {
    73			return 0
    74		}
    75		c := sep[0]
    76		if n == 1 {
    77			// special case worth making fast
    78			for i := 0; i < len(s); i++ {
    79				if s[i] == c {
    80					return i
    81				}
    82			}
    83			return -1
    84		}
    85		// n > 1
    86		for i := 0; i+n <= len(s); i++ {
    87			if s[i] == c && s[i:i+n] == sep {
    88				return i
    89			}
    90		}
    91		return -1
    92	}
    93	
    94	// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
    95	func LastIndex(s, sep string) int {
    96		n := len(sep)
    97		if n == 0 {
    98			return len(s)
    99		}
   100		c := sep[0]
   101		if n == 1 {
   102			// special case worth making fast
   103			for i := len(s) - 1; i >= 0; i-- {
   104				if s[i] == c {
   105					return i
   106				}
   107			}
   108			return -1
   109		}
   110		// n > 1
   111		for i := len(s) - n; i >= 0; i-- {
   112			if s[i] == c && s[i:i+n] == sep {
   113				return i
   114			}
   115		}
   116		return -1
   117	}
   118	
   119	// IndexRune returns the index of the first instance of the Unicode code point
   120	// rune, or -1 if rune is not present in s.
   121	func IndexRune(s string, rune int) int {
   122		switch {
   123		case rune < 0x80:
   124			b := byte(rune)
   125			for i := 0; i < len(s); i++ {
   126				if s[i] == b {
   127					return i
   128				}
   129			}
   130		default:
   131			for i, c := range s {
   132				if c == rune {
   133					return i
   134				}
   135			}
   136		}
   137		return -1
   138	}
   139	
   140	// IndexAny returns the index of the first instance of any Unicode code point
   141	// from chars in s, or -1 if no Unicode code point from chars is present in s.
   142	func IndexAny(s, chars string) int {
   143		if len(chars) > 0 {
   144			for i, c := range s {
   145				for _, m := range chars {
   146					if c == m {
   147						return i
   148					}
   149				}
   150			}
   151		}
   152		return -1
   153	}
   154	
   155	// LastIndexAny returns the index of the last instance of any Unicode code
   156	// point from chars in s, or -1 if no Unicode code point from chars is
   157	// present in s.
   158	func LastIndexAny(s, chars string) int {
   159		if len(chars) > 0 {
   160			for i := len(s); i > 0; {
   161				rune, size := utf8.DecodeLastRuneInString(s[0:i])
   162				i -= size
   163				for _, m := range chars {
   164					if rune == m {
   165						return i
   166					}
   167				}
   168			}
   169		}
   170		return -1
   171	}
   172	
   173	// Generic split: splits after each instance of sep,
   174	// including sepSave bytes of sep in the subarrays.
   175	func genSplit(s, sep string, sepSave, n int) []string {
   176		if n == 0 {
   177			return nil
   178		}
   179		if sep == "" {
   180			return explode(s, n)
   181		}
   182		if n < 0 {
   183			n = Count(s, sep) + 1
   184		}
   185		c := sep[0]
   186		start := 0
   187		a := make([]string, n)
   188		na := 0
   189		for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
   190			if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
   191				a[na] = s[start : i+sepSave]
   192				na++
   193				start = i + len(sep)
   194				i += len(sep) - 1
   195			}
   196		}
   197		a[na] = s[start:]
   198		return a[0 : na+1]
   199	}
   200	
   201	// SplitN slices s into substrings separated by sep and returns a slice of
   202	// the substrings between those separators.
   203	// If sep is empty, SplitN splits after each UTF-8 sequence.
   204	// The count determines the number of substrings to return:
   205	//   n > 0: at most n substrings; the last substring will be the unsplit remainder.
   206	//   n == 0: the result is nil (zero substrings)
   207	//   n < 0: all substrings
   208	func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
   209	
   210	// SplitAfterN slices s into substrings after each instance of sep and
   211	// returns a slice of those substrings.
   212	// If sep is empty, SplitAfterN splits after each UTF-8 sequence.
   213	// The count determines the number of substrings to return:
   214	//   n > 0: at most n substrings; the last substring will be the unsplit remainder.
   215	//   n == 0: the result is nil (zero substrings)
   216	//   n < 0: all substrings
   217	func SplitAfterN(s, sep string, n int) []string {
   218		return genSplit(s, sep, len(sep), n)
   219	}
   220	
   221	// Split slices s into all substrings separated by sep and returns a slice of
   222	// the substrings between those separators.
   223	// If sep is empty, Split splits after each UTF-8 sequence.
   224	// It is equivalent to SplitN with a count of -1.
   225	func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
   226	
   227	// SplitAfter slices s into all substrings after each instance of sep and
   228	// returns a slice of those substrings.
   229	// If sep is empty, SplitAfter splits after each UTF-8 sequence.
   230	// It is equivalent to SplitAfterN with a count of -1.
   231	func SplitAfter(s, sep string) []string {
   232		return genSplit(s, sep, len(sep), -1)
   233	}
   234	
   235	// Fields splits the string s around each instance of one or more consecutive white space
   236	// characters, returning an array of substrings of s or an empty list if s contains only white space.
   237	func Fields(s string) []string {
   238		return FieldsFunc(s, unicode.IsSpace)
   239	}
   240	
   241	// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
   242	// and returns an array of slices of s. If all code points in s satisfy f(c) or the
   243	// string is empty, an empty slice is returned.
   244	func FieldsFunc(s string, f func(int) bool) []string {
   245		// First count the fields.
   246		n := 0
   247		inField := false
   248		for _, rune := range s {
   249			wasInField := inField
   250			inField = !f(rune)
   251			if inField && !wasInField {
   252				n++
   253			}
   254		}
   255	
   256		// Now create them.
   257		a := make([]string, n)
   258		na := 0
   259		fieldStart := -1 // Set to -1 when looking for start of field.
   260		for i, rune := range s {
   261			if f(rune) {
   262				if fieldStart >= 0 {
   263					a[na] = s[fieldStart:i]
   264					na++
   265					fieldStart = -1
   266				}
   267			} else if fieldStart == -1 {
   268				fieldStart = i
   269			}
   270		}
   271		if fieldStart != -1 { // Last field might end at EOF.
   272			a[na] = s[fieldStart:]
   273		}
   274		return a
   275	}
   276	
   277	// Join concatenates the elements of a to create a single string.   The separator string
   278	// sep is placed between elements in the resulting string.
   279	func Join(a []string, sep string) string {
   280		if len(a) == 0 {
   281			return ""
   282		}
   283		if len(a) == 1 {
   284			return a[0]
   285		}
   286		n := len(sep) * (len(a) - 1)
   287		for i := 0; i < len(a); i++ {
   288			n += len(a[i])
   289		}
   290	
   291		b := make([]byte, n)
   292		bp := copy(b, a[0])
   293		for _, s := range a[1:] {
   294			bp += copy(b[bp:], sep)
   295			bp += copy(b[bp:], s)
   296		}
   297		return string(b)
   298	}
   299	
   300	// HasPrefix tests whether the string s begins with prefix.
   301	func HasPrefix(s, prefix string) bool {
   302		return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
   303	}
   304	
   305	// HasSuffix tests whether the string s ends with suffix.
   306	func HasSuffix(s, suffix string) bool {
   307		return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
   308	}
   309	
   310	// Map returns a copy of the string s with all its characters modified
   311	// according to the mapping function. If mapping returns a negative value, the character is
   312	// dropped from the string with no replacement.
   313	func Map(mapping func(rune int) int, s string) string {
   314		// In the worst case, the string can grow when mapped, making
   315		// things unpleasant.  But it's so rare we barge in assuming it's
   316		// fine.  It could also shrink but that falls out naturally.
   317		maxbytes := len(s) // length of b
   318		nbytes := 0        // number of bytes encoded in b
   319		// The output buffer b is initialized on demand, the first
   320		// time a character differs.
   321		var b []byte
   322	
   323		for i, c := range s {
   324			rune := mapping(c)
   325			if b == nil {
   326				if rune == c {
   327					continue
   328				}
   329				b = make([]byte, maxbytes)
   330				nbytes = copy(b, s[:i])
   331			}
   332			if rune >= 0 {
   333				wid := 1
   334				if rune >= utf8.RuneSelf {
   335					wid = utf8.RuneLen(rune)
   336				}
   337				if nbytes+wid > maxbytes {
   338					// Grow the buffer.
   339					maxbytes = maxbytes*2 + utf8.UTFMax
   340					nb := make([]byte, maxbytes)
   341					copy(nb, b[0:nbytes])
   342					b = nb
   343				}
   344				nbytes += utf8.EncodeRune(b[nbytes:maxbytes], rune)
   345			}
   346		}
   347		if b == nil {
   348			return s
   349		}
   350		return string(b[0:nbytes])
   351	}
   352	
   353	// Repeat returns a new string consisting of count copies of the string s.
   354	func Repeat(s string, count int) string {
   355		b := make([]byte, len(s)*count)
   356		bp := 0
   357		for i := 0; i < count; i++ {
   358			for j := 0; j < len(s); j++ {
   359				b[bp] = s[j]
   360				bp++
   361			}
   362		}
   363		return string(b)
   364	}
   365	
   366	// ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
   367	func ToUpper(s string) string { return Map(unicode.ToUpper, s) }
   368	
   369	// ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
   370	func ToLower(s string) string { return Map(unicode.ToLower, s) }
   371	
   372	// ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
   373	func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
   374	
   375	// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
   376	// upper case, giving priority to the special casing rules.
   377	func ToUpperSpecial(_case unicode.SpecialCase, s string) string {
   378		return Map(func(r int) int { return _case.ToUpper(r) }, s)
   379	}
   380	
   381	// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
   382	// lower case, giving priority to the special casing rules.
   383	func ToLowerSpecial(_case unicode.SpecialCase, s string) string {
   384		return Map(func(r int) int { return _case.ToLower(r) }, s)
   385	}
   386	
   387	// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
   388	// title case, giving priority to the special casing rules.
   389	func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
   390		return Map(func(r int) int { return _case.ToTitle(r) }, s)
   391	}
   392	
   393	// isSeparator reports whether the rune could mark a word boundary.
   394	// TODO: update when package unicode captures more of the properties.
   395	func isSeparator(rune int) bool {
   396		// ASCII alphanumerics and underscore are not separators
   397		if rune <= 0x7F {
   398			switch {
   399			case '0' <= rune && rune <= '9':
   400				return false
   401			case 'a' <= rune && rune <= 'z':
   402				return false
   403			case 'A' <= rune && rune <= 'Z':
   404				return false
   405			case rune == '_':
   406				return false
   407			}
   408			return true
   409		}
   410		// Letters and digits are not separators
   411		if unicode.IsLetter(rune) || unicode.IsDigit(rune) {
   412			return false
   413		}
   414		// Otherwise, all we can do for now is treat spaces as separators.
   415		return unicode.IsSpace(rune)
   416	}
   417	
   418	// BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
   419	
   420	// Title returns a copy of the string s with all Unicode letters that begin words
   421	// mapped to their title case.
   422	func Title(s string) string {
   423		// Use a closure here to remember state.
   424		// Hackish but effective. Depends on Map scanning in order and calling
   425		// the closure once per rune.
   426		prev := ' '
   427		return Map(
   428			func(r int) int {
   429				if isSeparator(prev) {
   430					prev = r
   431					return unicode.ToTitle(r)
   432				}
   433				prev = r
   434				return r
   435			},
   436			s)
   437	}
   438	
   439	// TrimLeftFunc returns a slice of the string s with all leading
   440	// Unicode code points c satisfying f(c) removed.
   441	func TrimLeftFunc(s string, f func(r int) bool) string {
   442		i := indexFunc(s, f, false)
   443		if i == -1 {
   444			return ""
   445		}
   446		return s[i:]
   447	}
   448	
   449	// TrimRightFunc returns a slice of the string s with all trailing
   450	// Unicode code points c satisfying f(c) removed.
   451	func TrimRightFunc(s string, f func(r int) bool) string {
   452		i := lastIndexFunc(s, f, false)
   453		if i >= 0 && s[i] >= utf8.RuneSelf {
   454			_, wid := utf8.DecodeRuneInString(s[i:])
   455			i += wid
   456		} else {
   457			i++
   458		}
   459		return s[0:i]
   460	}
   461	
   462	// TrimFunc returns a slice of the string s with all leading
   463	// and trailing Unicode code points c satisfying f(c) removed.
   464	func TrimFunc(s string, f func(r int) bool) string {
   465		return TrimRightFunc(TrimLeftFunc(s, f), f)
   466	}
   467	
   468	// IndexFunc returns the index into s of the first Unicode
   469	// code point satisfying f(c), or -1 if none do.
   470	func IndexFunc(s string, f func(r int) bool) int {
   471		return indexFunc(s, f, true)
   472	}
   473	
   474	// LastIndexFunc returns the index into s of the last
   475	// Unicode code point satisfying f(c), or -1 if none do.
   476	func LastIndexFunc(s string, f func(r int) bool) int {
   477		return lastIndexFunc(s, f, true)
   478	}
   479	
   480	// indexFunc is the same as IndexFunc except that if
   481	// truth==false, the sense of the predicate function is
   482	// inverted.
   483	func indexFunc(s string, f func(r int) bool, truth bool) int {
   484		start := 0
   485		for start < len(s) {
   486			wid := 1
   487			rune := int(s[start])
   488			if rune >= utf8.RuneSelf {
   489				rune, wid = utf8.DecodeRuneInString(s[start:])
   490			}
   491			if f(rune) == truth {
   492				return start
   493			}
   494			start += wid
   495		}
   496		return -1
   497	}
   498	
   499	// lastIndexFunc is the same as LastIndexFunc except that if
   500	// truth==false, the sense of the predicate function is
   501	// inverted.
   502	func lastIndexFunc(s string, f func(r int) bool, truth bool) int {
   503		for i := len(s); i > 0; {
   504			rune, size := utf8.DecodeLastRuneInString(s[0:i])
   505			i -= size
   506			if f(rune) == truth {
   507				return i
   508			}
   509		}
   510		return -1
   511	}
   512	
   513	func makeCutsetFunc(cutset string) func(rune int) bool {
   514		return func(rune int) bool { return IndexRune(cutset, rune) != -1 }
   515	}
   516	
   517	// Trim returns a slice of the string s with all leading and
   518	// trailing Unicode code points contained in cutset removed.
   519	func Trim(s string, cutset string) string {
   520		if s == "" || cutset == "" {
   521			return s
   522		}
   523		return TrimFunc(s, makeCutsetFunc(cutset))
   524	}
   525	
   526	// TrimLeft returns a slice of the string s with all leading
   527	// Unicode code points contained in cutset removed.
   528	func TrimLeft(s string, cutset string) string {
   529		if s == "" || cutset == "" {
   530			return s
   531		}
   532		return TrimLeftFunc(s, makeCutsetFunc(cutset))
   533	}
   534	
   535	// TrimRight returns a slice of the string s, with all trailing
   536	// Unicode code points contained in cutset removed.
   537	func TrimRight(s string, cutset string) string {
   538		if s == "" || cutset == "" {
   539			return s
   540		}
   541		return TrimRightFunc(s, makeCutsetFunc(cutset))
   542	}
   543	
   544	// TrimSpace returns a slice of the string s, with all leading
   545	// and trailing white space removed, as defined by Unicode.
   546	func TrimSpace(s string) string {
   547		return TrimFunc(s, unicode.IsSpace)
   548	}
   549	
   550	// Replace returns a copy of the string s with the first n
   551	// non-overlapping instances of old replaced by new.
   552	// If n < 0, there is no limit on the number of replacements.
   553	func Replace(s, old, new string, n int) string {
   554		if old == new || n == 0 {
   555			return s // avoid allocation
   556		}
   557	
   558		// Compute number of replacements.
   559		if m := Count(s, old); m == 0 {
   560			return s // avoid allocation
   561		} else if n < 0 || m < n {
   562			n = m
   563		}
   564	
   565		// Apply replacements to buffer.
   566		t := make([]byte, len(s)+n*(len(new)-len(old)))
   567		w := 0
   568		start := 0
   569		for i := 0; i < n; i++ {
   570			j := start
   571			if len(old) == 0 {
   572				if i > 0 {
   573					_, wid := utf8.DecodeRuneInString(s[start:])
   574					j += wid
   575				}
   576			} else {
   577				j += Index(s[start:], old)
   578			}
   579			w += copy(t[w:], s[start:j])
   580			w += copy(t[w:], new)
   581			start = j + len(old)
   582		}
   583		w += copy(t[w:], s[start:])
   584		return string(t[0:w])
   585	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.