...
Run Format

Source file src/strings/replace.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package strings
     6	
     7	import "io"
     8	
     9	// Replacer replaces a list of strings with replacements.
    10	// It is safe for concurrent use by multiple goroutines.
    11	type Replacer struct {
    12		r replacer
    13	}
    14	
    15	// replacer is the interface that a replacement algorithm needs to implement.
    16	type replacer interface {
    17		Replace(s string) string
    18		WriteString(w io.Writer, s string) (n int, err error)
    19	}
    20	
    21	// NewReplacer returns a new Replacer from a list of old, new string pairs.
    22	// Replacements are performed in order, without overlapping matches.
    23	func NewReplacer(oldnew ...string) *Replacer {
    24		if len(oldnew)%2 == 1 {
    25			panic("strings.NewReplacer: odd argument count")
    26		}
    27	
    28		if len(oldnew) == 2 && len(oldnew[0]) > 1 {
    29			return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
    30		}
    31	
    32		allNewBytes := true
    33		for i := 0; i < len(oldnew); i += 2 {
    34			if len(oldnew[i]) != 1 {
    35				return &Replacer{r: makeGenericReplacer(oldnew)}
    36			}
    37			if len(oldnew[i+1]) != 1 {
    38				allNewBytes = false
    39			}
    40		}
    41	
    42		if allNewBytes {
    43			r := byteReplacer{}
    44			for i := range r {
    45				r[i] = byte(i)
    46			}
    47			// The first occurrence of old->new map takes precedence
    48			// over the others with the same old string.
    49			for i := len(oldnew) - 2; i >= 0; i -= 2 {
    50				o := oldnew[i][0]
    51				n := oldnew[i+1][0]
    52				r[o] = n
    53			}
    54			return &Replacer{r: &r}
    55		}
    56	
    57		r := byteStringReplacer{}
    58		// The first occurrence of old->new map takes precedence
    59		// over the others with the same old string.
    60		for i := len(oldnew) - 2; i >= 0; i -= 2 {
    61			o := oldnew[i][0]
    62			n := oldnew[i+1]
    63			r[o] = []byte(n)
    64		}
    65		return &Replacer{r: &r}
    66	}
    67	
    68	// Replace returns a copy of s with all replacements performed.
    69	func (r *Replacer) Replace(s string) string {
    70		return r.r.Replace(s)
    71	}
    72	
    73	// WriteString writes s to w with all replacements performed.
    74	func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
    75		return r.r.WriteString(w, s)
    76	}
    77	
    78	// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
    79	// and values may be empty. For example, the trie containing keys "ax", "ay",
    80	// "bcbc", "x" and "xy" could have eight nodes:
    81	//
    82	//  n0  -
    83	//  n1  a-
    84	//  n2  .x+
    85	//  n3  .y+
    86	//  n4  b-
    87	//  n5  .cbc+
    88	//  n6  x+
    89	//  n7  .y+
    90	//
    91	// n0 is the root node, and its children are n1, n4 and n6; n1's children are
    92	// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
    93	// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
    94	// (marked with a trailing "+") are complete keys.
    95	type trieNode struct {
    96		// value is the value of the trie node's key/value pair. It is empty if
    97		// this node is not a complete key.
    98		value string
    99		// priority is the priority (higher is more important) of the trie node's
   100		// key/value pair; keys are not necessarily matched shortest- or longest-
   101		// first. Priority is positive if this node is a complete key, and zero
   102		// otherwise. In the example above, positive/zero priorities are marked
   103		// with a trailing "+" or "-".
   104		priority int
   105	
   106		// A trie node may have zero, one or more child nodes:
   107		//  * if the remaining fields are zero, there are no children.
   108		//  * if prefix and next are non-zero, there is one child in next.
   109		//  * if table is non-zero, it defines all the children.
   110		//
   111		// Prefixes are preferred over tables when there is one child, but the
   112		// root node always uses a table for lookup efficiency.
   113	
   114		// prefix is the difference in keys between this trie node and the next.
   115		// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
   116		// Node n5 has no children and so has zero prefix, next and table fields.
   117		prefix string
   118		next   *trieNode
   119	
   120		// table is a lookup table indexed by the next byte in the key, after
   121		// remapping that byte through genericReplacer.mapping to create a dense
   122		// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
   123		// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
   124		// genericReplacer.tableSize will be 5. Node n0's table will be
   125		// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
   126		// 'a', 'b' and 'x'.
   127		table []*trieNode
   128	}
   129	
   130	func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
   131		if key == "" {
   132			if t.priority == 0 {
   133				t.value = val
   134				t.priority = priority
   135			}
   136			return
   137		}
   138	
   139		if t.prefix != "" {
   140			// Need to split the prefix among multiple nodes.
   141			var n int // length of the longest common prefix
   142			for ; n < len(t.prefix) && n < len(key); n++ {
   143				if t.prefix[n] != key[n] {
   144					break
   145				}
   146			}
   147			if n == len(t.prefix) {
   148				t.next.add(key[n:], val, priority, r)
   149			} else if n == 0 {
   150				// First byte differs, start a new lookup table here. Looking up
   151				// what is currently t.prefix[0] will lead to prefixNode, and
   152				// looking up key[0] will lead to keyNode.
   153				var prefixNode *trieNode
   154				if len(t.prefix) == 1 {
   155					prefixNode = t.next
   156				} else {
   157					prefixNode = &trieNode{
   158						prefix: t.prefix[1:],
   159						next:   t.next,
   160					}
   161				}
   162				keyNode := new(trieNode)
   163				t.table = make([]*trieNode, r.tableSize)
   164				t.table[r.mapping[t.prefix[0]]] = prefixNode
   165				t.table[r.mapping[key[0]]] = keyNode
   166				t.prefix = ""
   167				t.next = nil
   168				keyNode.add(key[1:], val, priority, r)
   169			} else {
   170				// Insert new node after the common section of the prefix.
   171				next := &trieNode{
   172					prefix: t.prefix[n:],
   173					next:   t.next,
   174				}
   175				t.prefix = t.prefix[:n]
   176				t.next = next
   177				next.add(key[n:], val, priority, r)
   178			}
   179		} else if t.table != nil {
   180			// Insert into existing table.
   181			m := r.mapping[key[0]]
   182			if t.table[m] == nil {
   183				t.table[m] = new(trieNode)
   184			}
   185			t.table[m].add(key[1:], val, priority, r)
   186		} else {
   187			t.prefix = key
   188			t.next = new(trieNode)
   189			t.next.add("", val, priority, r)
   190		}
   191	}
   192	
   193	func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
   194		// Iterate down the trie to the end, and grab the value and keylen with
   195		// the highest priority.
   196		bestPriority := 0
   197		node := &r.root
   198		n := 0
   199		for node != nil {
   200			if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
   201				bestPriority = node.priority
   202				val = node.value
   203				keylen = n
   204				found = true
   205			}
   206	
   207			if s == "" {
   208				break
   209			}
   210			if node.table != nil {
   211				index := r.mapping[s[0]]
   212				if int(index) == r.tableSize {
   213					break
   214				}
   215				node = node.table[index]
   216				s = s[1:]
   217				n++
   218			} else if node.prefix != "" && HasPrefix(s, node.prefix) {
   219				n += len(node.prefix)
   220				s = s[len(node.prefix):]
   221				node = node.next
   222			} else {
   223				break
   224			}
   225		}
   226		return
   227	}
   228	
   229	// genericReplacer is the fully generic algorithm.
   230	// It's used as a fallback when nothing faster can be used.
   231	type genericReplacer struct {
   232		root trieNode
   233		// tableSize is the size of a trie node's lookup table. It is the number
   234		// of unique key bytes.
   235		tableSize int
   236		// mapping maps from key bytes to a dense index for trieNode.table.
   237		mapping [256]byte
   238	}
   239	
   240	func makeGenericReplacer(oldnew []string) *genericReplacer {
   241		r := new(genericReplacer)
   242		// Find each byte used, then assign them each an index.
   243		for i := 0; i < len(oldnew); i += 2 {
   244			key := oldnew[i]
   245			for j := 0; j < len(key); j++ {
   246				r.mapping[key[j]] = 1
   247			}
   248		}
   249	
   250		for _, b := range r.mapping {
   251			r.tableSize += int(b)
   252		}
   253	
   254		var index byte
   255		for i, b := range r.mapping {
   256			if b == 0 {
   257				r.mapping[i] = byte(r.tableSize)
   258			} else {
   259				r.mapping[i] = index
   260				index++
   261			}
   262		}
   263		// Ensure root node uses a lookup table (for performance).
   264		r.root.table = make([]*trieNode, r.tableSize)
   265	
   266		for i := 0; i < len(oldnew); i += 2 {
   267			r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
   268		}
   269		return r
   270	}
   271	
   272	type appendSliceWriter []byte
   273	
   274	// Write writes to the buffer to satisfy io.Writer.
   275	func (w *appendSliceWriter) Write(p []byte) (int, error) {
   276		*w = append(*w, p...)
   277		return len(p), nil
   278	}
   279	
   280	// WriteString writes to the buffer without string->[]byte->string allocations.
   281	func (w *appendSliceWriter) WriteString(s string) (int, error) {
   282		*w = append(*w, s...)
   283		return len(s), nil
   284	}
   285	
   286	type stringWriterIface interface {
   287		WriteString(string) (int, error)
   288	}
   289	
   290	type stringWriter struct {
   291		w io.Writer
   292	}
   293	
   294	func (w stringWriter) WriteString(s string) (int, error) {
   295		return w.w.Write([]byte(s))
   296	}
   297	
   298	func getStringWriter(w io.Writer) stringWriterIface {
   299		sw, ok := w.(stringWriterIface)
   300		if !ok {
   301			sw = stringWriter{w}
   302		}
   303		return sw
   304	}
   305	
   306	func (r *genericReplacer) Replace(s string) string {
   307		buf := make(appendSliceWriter, 0, len(s))
   308		r.WriteString(&buf, s)
   309		return string(buf)
   310	}
   311	
   312	func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   313		sw := getStringWriter(w)
   314		var last, wn int
   315		var prevMatchEmpty bool
   316		for i := 0; i <= len(s); {
   317			// Fast path: s[i] is not a prefix of any pattern.
   318			if i != len(s) && r.root.priority == 0 {
   319				index := int(r.mapping[s[i]])
   320				if index == r.tableSize || r.root.table[index] == nil {
   321					i++
   322					continue
   323				}
   324			}
   325	
   326			// Ignore the empty match iff the previous loop found the empty match.
   327			val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
   328			prevMatchEmpty = match && keylen == 0
   329			if match {
   330				wn, err = sw.WriteString(s[last:i])
   331				n += wn
   332				if err != nil {
   333					return
   334				}
   335				wn, err = sw.WriteString(val)
   336				n += wn
   337				if err != nil {
   338					return
   339				}
   340				i += keylen
   341				last = i
   342				continue
   343			}
   344			i++
   345		}
   346		if last != len(s) {
   347			wn, err = sw.WriteString(s[last:])
   348			n += wn
   349		}
   350		return
   351	}
   352	
   353	// singleStringReplacer is the implementation that's used when there is only
   354	// one string to replace (and that string has more than one byte).
   355	type singleStringReplacer struct {
   356		finder *stringFinder
   357		// value is the new string that replaces that pattern when it's found.
   358		value string
   359	}
   360	
   361	func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
   362		return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
   363	}
   364	
   365	func (r *singleStringReplacer) Replace(s string) string {
   366		var buf []byte
   367		i, matched := 0, false
   368		for {
   369			match := r.finder.next(s[i:])
   370			if match == -1 {
   371				break
   372			}
   373			matched = true
   374			buf = append(buf, s[i:i+match]...)
   375			buf = append(buf, r.value...)
   376			i += match + len(r.finder.pattern)
   377		}
   378		if !matched {
   379			return s
   380		}
   381		buf = append(buf, s[i:]...)
   382		return string(buf)
   383	}
   384	
   385	func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   386		sw := getStringWriter(w)
   387		var i, wn int
   388		for {
   389			match := r.finder.next(s[i:])
   390			if match == -1 {
   391				break
   392			}
   393			wn, err = sw.WriteString(s[i : i+match])
   394			n += wn
   395			if err != nil {
   396				return
   397			}
   398			wn, err = sw.WriteString(r.value)
   399			n += wn
   400			if err != nil {
   401				return
   402			}
   403			i += match + len(r.finder.pattern)
   404		}
   405		wn, err = sw.WriteString(s[i:])
   406		n += wn
   407		return
   408	}
   409	
   410	// byteReplacer is the implementation that's used when all the "old"
   411	// and "new" values are single ASCII bytes.
   412	// The array contains replacement bytes indexed by old byte.
   413	type byteReplacer [256]byte
   414	
   415	func (r *byteReplacer) Replace(s string) string {
   416		var buf []byte // lazily allocated
   417		for i := 0; i < len(s); i++ {
   418			b := s[i]
   419			if r[b] != b {
   420				if buf == nil {
   421					buf = []byte(s)
   422				}
   423				buf[i] = r[b]
   424			}
   425		}
   426		if buf == nil {
   427			return s
   428		}
   429		return string(buf)
   430	}
   431	
   432	func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   433		// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
   434		bufsize := 32 << 10
   435		if len(s) < bufsize {
   436			bufsize = len(s)
   437		}
   438		buf := make([]byte, bufsize)
   439	
   440		for len(s) > 0 {
   441			ncopy := copy(buf, s[:])
   442			s = s[ncopy:]
   443			for i, b := range buf[:ncopy] {
   444				buf[i] = r[b]
   445			}
   446			wn, err := w.Write(buf[:ncopy])
   447			n += wn
   448			if err != nil {
   449				return n, err
   450			}
   451		}
   452		return n, nil
   453	}
   454	
   455	// byteStringReplacer is the implementation that's used when all the
   456	// "old" values are single ASCII bytes but the "new" values vary in size.
   457	// The array contains replacement byte slices indexed by old byte.
   458	// A nil []byte means that the old byte should not be replaced.
   459	type byteStringReplacer [256][]byte
   460	
   461	func (r *byteStringReplacer) Replace(s string) string {
   462		newSize := len(s)
   463		anyChanges := false
   464		for i := 0; i < len(s); i++ {
   465			b := s[i]
   466			if r[b] != nil {
   467				anyChanges = true
   468				// The -1 is because we are replacing 1 byte with len(r[b]) bytes.
   469				newSize += len(r[b]) - 1
   470			}
   471		}
   472		if !anyChanges {
   473			return s
   474		}
   475		buf := make([]byte, newSize)
   476		bi := buf
   477		for i := 0; i < len(s); i++ {
   478			b := s[i]
   479			if r[b] != nil {
   480				n := copy(bi, r[b])
   481				bi = bi[n:]
   482			} else {
   483				bi[0] = b
   484				bi = bi[1:]
   485			}
   486		}
   487		return string(buf)
   488	}
   489	
   490	func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
   491		sw := getStringWriter(w)
   492		last := 0
   493		for i := 0; i < len(s); i++ {
   494			b := s[i]
   495			if r[b] == nil {
   496				continue
   497			}
   498			if last != i {
   499				nw, err := sw.WriteString(s[last:i])
   500				n += nw
   501				if err != nil {
   502					return n, err
   503				}
   504			}
   505			last = i + 1
   506			nw, err := w.Write(r[b])
   507			n += nw
   508			if err != nil {
   509				return n, err
   510			}
   511		}
   512		if last != len(s) {
   513			var nw int
   514			nw, err = sw.WriteString(s[last:])
   515			n += nw
   516		}
   517		return
   518	}
   519	

View as plain text