...
Run Format

Source file src/strings/replace.go

  // Copyright 2011 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package strings
  
  import "io"
  
  // Replacer replaces a list of strings with replacements.
  // It is safe for concurrent use by multiple goroutines.
  type Replacer struct {
  	r replacer
  }
  
  // replacer is the interface that a replacement algorithm needs to implement.
  type replacer interface {
  	Replace(s string) string
  	WriteString(w io.Writer, s string) (n int, err error)
  }
  
  // NewReplacer returns a new Replacer from a list of old, new string pairs.
  // Replacements are performed in order, without overlapping matches.
  func NewReplacer(oldnew ...string) *Replacer {
  	if len(oldnew)%2 == 1 {
  		panic("strings.NewReplacer: odd argument count")
  	}
  
  	if len(oldnew) == 2 && len(oldnew[0]) > 1 {
  		return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
  	}
  
  	allNewBytes := true
  	for i := 0; i < len(oldnew); i += 2 {
  		if len(oldnew[i]) != 1 {
  			return &Replacer{r: makeGenericReplacer(oldnew)}
  		}
  		if len(oldnew[i+1]) != 1 {
  			allNewBytes = false
  		}
  	}
  
  	if allNewBytes {
  		r := byteReplacer{}
  		for i := range r {
  			r[i] = byte(i)
  		}
  		// The first occurrence of old->new map takes precedence
  		// over the others with the same old string.
  		for i := len(oldnew) - 2; i >= 0; i -= 2 {
  			o := oldnew[i][0]
  			n := oldnew[i+1][0]
  			r[o] = n
  		}
  		return &Replacer{r: &r}
  	}
  
  	r := byteStringReplacer{}
  	// The first occurrence of old->new map takes precedence
  	// over the others with the same old string.
  	for i := len(oldnew) - 2; i >= 0; i -= 2 {
  		o := oldnew[i][0]
  		n := oldnew[i+1]
  		r[o] = []byte(n)
  	}
  	return &Replacer{r: &r}
  }
  
  // Replace returns a copy of s with all replacements performed.
  func (r *Replacer) Replace(s string) string {
  	return r.r.Replace(s)
  }
  
  // WriteString writes s to w with all replacements performed.
  func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
  	return r.r.WriteString(w, s)
  }
  
  // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
  // and values may be empty. For example, the trie containing keys "ax", "ay",
  // "bcbc", "x" and "xy" could have eight nodes:
  //
  //  n0  -
  //  n1  a-
  //  n2  .x+
  //  n3  .y+
  //  n4  b-
  //  n5  .cbc+
  //  n6  x+
  //  n7  .y+
  //
  // n0 is the root node, and its children are n1, n4 and n6; n1's children are
  // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
  // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
  // (marked with a trailing "+") are complete keys.
  type trieNode struct {
  	// value is the value of the trie node's key/value pair. It is empty if
  	// this node is not a complete key.
  	value string
  	// priority is the priority (higher is more important) of the trie node's
  	// key/value pair; keys are not necessarily matched shortest- or longest-
  	// first. Priority is positive if this node is a complete key, and zero
  	// otherwise. In the example above, positive/zero priorities are marked
  	// with a trailing "+" or "-".
  	priority int
  
  	// A trie node may have zero, one or more child nodes:
  	//  * if the remaining fields are zero, there are no children.
  	//  * if prefix and next are non-zero, there is one child in next.
  	//  * if table is non-zero, it defines all the children.
  	//
  	// Prefixes are preferred over tables when there is one child, but the
  	// root node always uses a table for lookup efficiency.
  
  	// prefix is the difference in keys between this trie node and the next.
  	// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
  	// Node n5 has no children and so has zero prefix, next and table fields.
  	prefix string
  	next   *trieNode
  
  	// table is a lookup table indexed by the next byte in the key, after
  	// remapping that byte through genericReplacer.mapping to create a dense
  	// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
  	// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
  	// genericReplacer.tableSize will be 5. Node n0's table will be
  	// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
  	// 'a', 'b' and 'x'.
  	table []*trieNode
  }
  
  func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
  	if key == "" {
  		if t.priority == 0 {
  			t.value = val
  			t.priority = priority
  		}
  		return
  	}
  
  	if t.prefix != "" {
  		// Need to split the prefix among multiple nodes.
  		var n int // length of the longest common prefix
  		for ; n < len(t.prefix) && n < len(key); n++ {
  			if t.prefix[n] != key[n] {
  				break
  			}
  		}
  		if n == len(t.prefix) {
  			t.next.add(key[n:], val, priority, r)
  		} else if n == 0 {
  			// First byte differs, start a new lookup table here. Looking up
  			// what is currently t.prefix[0] will lead to prefixNode, and
  			// looking up key[0] will lead to keyNode.
  			var prefixNode *trieNode
  			if len(t.prefix) == 1 {
  				prefixNode = t.next
  			} else {
  				prefixNode = &trieNode{
  					prefix: t.prefix[1:],
  					next:   t.next,
  				}
  			}
  			keyNode := new(trieNode)
  			t.table = make([]*trieNode, r.tableSize)
  			t.table[r.mapping[t.prefix[0]]] = prefixNode
  			t.table[r.mapping[key[0]]] = keyNode
  			t.prefix = ""
  			t.next = nil
  			keyNode.add(key[1:], val, priority, r)
  		} else {
  			// Insert new node after the common section of the prefix.
  			next := &trieNode{
  				prefix: t.prefix[n:],
  				next:   t.next,
  			}
  			t.prefix = t.prefix[:n]
  			t.next = next
  			next.add(key[n:], val, priority, r)
  		}
  	} else if t.table != nil {
  		// Insert into existing table.
  		m := r.mapping[key[0]]
  		if t.table[m] == nil {
  			t.table[m] = new(trieNode)
  		}
  		t.table[m].add(key[1:], val, priority, r)
  	} else {
  		t.prefix = key
  		t.next = new(trieNode)
  		t.next.add("", val, priority, r)
  	}
  }
  
  func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
  	// Iterate down the trie to the end, and grab the value and keylen with
  	// the highest priority.
  	bestPriority := 0
  	node := &r.root
  	n := 0
  	for node != nil {
  		if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
  			bestPriority = node.priority
  			val = node.value
  			keylen = n
  			found = true
  		}
  
  		if s == "" {
  			break
  		}
  		if node.table != nil {
  			index := r.mapping[s[0]]
  			if int(index) == r.tableSize {
  				break
  			}
  			node = node.table[index]
  			s = s[1:]
  			n++
  		} else if node.prefix != "" && HasPrefix(s, node.prefix) {
  			n += len(node.prefix)
  			s = s[len(node.prefix):]
  			node = node.next
  		} else {
  			break
  		}
  	}
  	return
  }
  
  // genericReplacer is the fully generic algorithm.
  // It's used as a fallback when nothing faster can be used.
  type genericReplacer struct {
  	root trieNode
  	// tableSize is the size of a trie node's lookup table. It is the number
  	// of unique key bytes.
  	tableSize int
  	// mapping maps from key bytes to a dense index for trieNode.table.
  	mapping [256]byte
  }
  
  func makeGenericReplacer(oldnew []string) *genericReplacer {
  	r := new(genericReplacer)
  	// Find each byte used, then assign them each an index.
  	for i := 0; i < len(oldnew); i += 2 {
  		key := oldnew[i]
  		for j := 0; j < len(key); j++ {
  			r.mapping[key[j]] = 1
  		}
  	}
  
  	for _, b := range r.mapping {
  		r.tableSize += int(b)
  	}
  
  	var index byte
  	for i, b := range r.mapping {
  		if b == 0 {
  			r.mapping[i] = byte(r.tableSize)
  		} else {
  			r.mapping[i] = index
  			index++
  		}
  	}
  	// Ensure root node uses a lookup table (for performance).
  	r.root.table = make([]*trieNode, r.tableSize)
  
  	for i := 0; i < len(oldnew); i += 2 {
  		r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
  	}
  	return r
  }
  
  type appendSliceWriter []byte
  
  // Write writes to the buffer to satisfy io.Writer.
  func (w *appendSliceWriter) Write(p []byte) (int, error) {
  	*w = append(*w, p...)
  	return len(p), nil
  }
  
  // WriteString writes to the buffer without string->[]byte->string allocations.
  func (w *appendSliceWriter) WriteString(s string) (int, error) {
  	*w = append(*w, s...)
  	return len(s), nil
  }
  
  type stringWriterIface interface {
  	WriteString(string) (int, error)
  }
  
  type stringWriter struct {
  	w io.Writer
  }
  
  func (w stringWriter) WriteString(s string) (int, error) {
  	return w.w.Write([]byte(s))
  }
  
  func getStringWriter(w io.Writer) stringWriterIface {
  	sw, ok := w.(stringWriterIface)
  	if !ok {
  		sw = stringWriter{w}
  	}
  	return sw
  }
  
  func (r *genericReplacer) Replace(s string) string {
  	buf := make(appendSliceWriter, 0, len(s))
  	r.WriteString(&buf, s)
  	return string(buf)
  }
  
  func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
  	sw := getStringWriter(w)
  	var last, wn int
  	var prevMatchEmpty bool
  	for i := 0; i <= len(s); {
  		// Fast path: s[i] is not a prefix of any pattern.
  		if i != len(s) && r.root.priority == 0 {
  			index := int(r.mapping[s[i]])
  			if index == r.tableSize || r.root.table[index] == nil {
  				i++
  				continue
  			}
  		}
  
  		// Ignore the empty match iff the previous loop found the empty match.
  		val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
  		prevMatchEmpty = match && keylen == 0
  		if match {
  			wn, err = sw.WriteString(s[last:i])
  			n += wn
  			if err != nil {
  				return
  			}
  			wn, err = sw.WriteString(val)
  			n += wn
  			if err != nil {
  				return
  			}
  			i += keylen
  			last = i
  			continue
  		}
  		i++
  	}
  	if last != len(s) {
  		wn, err = sw.WriteString(s[last:])
  		n += wn
  	}
  	return
  }
  
  // singleStringReplacer is the implementation that's used when there is only
  // one string to replace (and that string has more than one byte).
  type singleStringReplacer struct {
  	finder *stringFinder
  	// value is the new string that replaces that pattern when it's found.
  	value string
  }
  
  func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
  	return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
  }
  
  func (r *singleStringReplacer) Replace(s string) string {
  	var buf []byte
  	i, matched := 0, false
  	for {
  		match := r.finder.next(s[i:])
  		if match == -1 {
  			break
  		}
  		matched = true
  		buf = append(buf, s[i:i+match]...)
  		buf = append(buf, r.value...)
  		i += match + len(r.finder.pattern)
  	}
  	if !matched {
  		return s
  	}
  	buf = append(buf, s[i:]...)
  	return string(buf)
  }
  
  func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
  	sw := getStringWriter(w)
  	var i, wn int
  	for {
  		match := r.finder.next(s[i:])
  		if match == -1 {
  			break
  		}
  		wn, err = sw.WriteString(s[i : i+match])
  		n += wn
  		if err != nil {
  			return
  		}
  		wn, err = sw.WriteString(r.value)
  		n += wn
  		if err != nil {
  			return
  		}
  		i += match + len(r.finder.pattern)
  	}
  	wn, err = sw.WriteString(s[i:])
  	n += wn
  	return
  }
  
  // byteReplacer is the implementation that's used when all the "old"
  // and "new" values are single ASCII bytes.
  // The array contains replacement bytes indexed by old byte.
  type byteReplacer [256]byte
  
  func (r *byteReplacer) Replace(s string) string {
  	var buf []byte // lazily allocated
  	for i := 0; i < len(s); i++ {
  		b := s[i]
  		if r[b] != b {
  			if buf == nil {
  				buf = []byte(s)
  			}
  			buf[i] = r[b]
  		}
  	}
  	if buf == nil {
  		return s
  	}
  	return string(buf)
  }
  
  func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
  	// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
  	bufsize := 32 << 10
  	if len(s) < bufsize {
  		bufsize = len(s)
  	}
  	buf := make([]byte, bufsize)
  
  	for len(s) > 0 {
  		ncopy := copy(buf, s[:])
  		s = s[ncopy:]
  		for i, b := range buf[:ncopy] {
  			buf[i] = r[b]
  		}
  		wn, err := w.Write(buf[:ncopy])
  		n += wn
  		if err != nil {
  			return n, err
  		}
  	}
  	return n, nil
  }
  
  // byteStringReplacer is the implementation that's used when all the
  // "old" values are single ASCII bytes but the "new" values vary in size.
  // The array contains replacement byte slices indexed by old byte.
  // A nil []byte means that the old byte should not be replaced.
  type byteStringReplacer [256][]byte
  
  func (r *byteStringReplacer) Replace(s string) string {
  	newSize := len(s)
  	anyChanges := false
  	for i := 0; i < len(s); i++ {
  		b := s[i]
  		if r[b] != nil {
  			anyChanges = true
  			// The -1 is because we are replacing 1 byte with len(r[b]) bytes.
  			newSize += len(r[b]) - 1
  		}
  	}
  	if !anyChanges {
  		return s
  	}
  	buf := make([]byte, newSize)
  	bi := buf
  	for i := 0; i < len(s); i++ {
  		b := s[i]
  		if r[b] != nil {
  			n := copy(bi, r[b])
  			bi = bi[n:]
  		} else {
  			bi[0] = b
  			bi = bi[1:]
  		}
  	}
  	return string(buf)
  }
  
  func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
  	sw := getStringWriter(w)
  	last := 0
  	for i := 0; i < len(s); i++ {
  		b := s[i]
  		if r[b] == nil {
  			continue
  		}
  		if last != i {
  			nw, err := sw.WriteString(s[last:i])
  			n += nw
  			if err != nil {
  				return n, err
  			}
  		}
  		last = i + 1
  		nw, err := w.Write(r[b])
  		n += nw
  		if err != nil {
  			return n, err
  		}
  	}
  	if last != len(s) {
  		var nw int
  		nw, err = sw.WriteString(s[last:])
  		n += nw
  	}
  	return
  }
  

View as plain text