...
Run Format

Source file src/mime/encodedword.go

Documentation: mime

  // Copyright 2015 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package mime
  
  import (
  	"bytes"
  	"encoding/base64"
  	"errors"
  	"fmt"
  	"io"
  	"strings"
  	"sync"
  	"unicode"
  	"unicode/utf8"
  )
  
  // A WordEncoder is an RFC 2047 encoded-word encoder.
  type WordEncoder byte
  
  const (
  	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
  	BEncoding = WordEncoder('b')
  	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
  	QEncoding = WordEncoder('q')
  )
  
  var (
  	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
  )
  
  // Encode returns the encoded-word form of s. If s is ASCII without special
  // characters, it is returned unchanged. The provided charset is the IANA
  // charset name of s. It is case insensitive.
  func (e WordEncoder) Encode(charset, s string) string {
  	if !needsEncoding(s) {
  		return s
  	}
  	return e.encodeWord(charset, s)
  }
  
  func needsEncoding(s string) bool {
  	for _, b := range s {
  		if (b < ' ' || b > '~') && b != '\t' {
  			return true
  		}
  	}
  	return false
  }
  
  // encodeWord encodes a string into an encoded-word.
  func (e WordEncoder) encodeWord(charset, s string) string {
  	buf := getBuffer()
  	defer putBuffer(buf)
  
  	e.openWord(buf, charset)
  	if e == BEncoding {
  		e.bEncode(buf, charset, s)
  	} else {
  		e.qEncode(buf, charset, s)
  	}
  	closeWord(buf)
  
  	return buf.String()
  }
  
  const (
  	// The maximum length of an encoded-word is 75 characters.
  	// See RFC 2047, section 2.
  	maxEncodedWordLen = 75
  	// maxContentLen is how much content can be encoded, ignoring the header and
  	// 2-byte footer.
  	maxContentLen = maxEncodedWordLen - len("=?UTF-8?q?") - len("?=")
  )
  
  var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)
  
  // bEncode encodes s using base64 encoding and writes it to buf.
  func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
  	w := base64.NewEncoder(base64.StdEncoding, buf)
  	// If the charset is not UTF-8 or if the content is short, do not bother
  	// splitting the encoded-word.
  	if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
  		io.WriteString(w, s)
  		w.Close()
  		return
  	}
  
  	var currentLen, last, runeLen int
  	for i := 0; i < len(s); i += runeLen {
  		// Multi-byte characters must not be split across encoded-words.
  		// See RFC 2047, section 5.3.
  		_, runeLen = utf8.DecodeRuneInString(s[i:])
  
  		if currentLen+runeLen <= maxBase64Len {
  			currentLen += runeLen
  		} else {
  			io.WriteString(w, s[last:i])
  			w.Close()
  			e.splitWord(buf, charset)
  			last = i
  			currentLen = runeLen
  		}
  	}
  	io.WriteString(w, s[last:])
  	w.Close()
  }
  
  // qEncode encodes s using Q encoding and writes it to buf. It splits the
  // encoded-words when necessary.
  func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
  	// We only split encoded-words when the charset is UTF-8.
  	if !isUTF8(charset) {
  		writeQString(buf, s)
  		return
  	}
  
  	var currentLen, runeLen int
  	for i := 0; i < len(s); i += runeLen {
  		b := s[i]
  		// Multi-byte characters must not be split across encoded-words.
  		// See RFC 2047, section 5.3.
  		var encLen int
  		if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
  			runeLen, encLen = 1, 1
  		} else {
  			_, runeLen = utf8.DecodeRuneInString(s[i:])
  			encLen = 3 * runeLen
  		}
  
  		if currentLen+encLen > maxContentLen {
  			e.splitWord(buf, charset)
  			currentLen = 0
  		}
  		writeQString(buf, s[i:i+runeLen])
  		currentLen += encLen
  	}
  }
  
  // writeQString encodes s using Q encoding and writes it to buf.
  func writeQString(buf *bytes.Buffer, s string) {
  	for i := 0; i < len(s); i++ {
  		switch b := s[i]; {
  		case b == ' ':
  			buf.WriteByte('_')
  		case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
  			buf.WriteByte(b)
  		default:
  			buf.WriteByte('=')
  			buf.WriteByte(upperhex[b>>4])
  			buf.WriteByte(upperhex[b&0x0f])
  		}
  	}
  }
  
  // openWord writes the beginning of an encoded-word into buf.
  func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
  	buf.WriteString("=?")
  	buf.WriteString(charset)
  	buf.WriteByte('?')
  	buf.WriteByte(byte(e))
  	buf.WriteByte('?')
  }
  
  // closeWord writes the end of an encoded-word into buf.
  func closeWord(buf *bytes.Buffer) {
  	buf.WriteString("?=")
  }
  
  // splitWord closes the current encoded-word and opens a new one.
  func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
  	closeWord(buf)
  	buf.WriteByte(' ')
  	e.openWord(buf, charset)
  }
  
  func isUTF8(charset string) bool {
  	return strings.EqualFold(charset, "UTF-8")
  }
  
  const upperhex = "0123456789ABCDEF"
  
  // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
  type WordDecoder struct {
  	// CharsetReader, if non-nil, defines a function to generate
  	// charset-conversion readers, converting from the provided
  	// charset into UTF-8.
  	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
  	// are handled by default.
  	// One of the CharsetReader's result values must be non-nil.
  	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
  }
  
  // Decode decodes an RFC 2047 encoded-word.
  func (d *WordDecoder) Decode(word string) (string, error) {
  	// See https://tools.ietf.org/html/rfc2047#section-2 for details.
  	// Our decoder is permissive, we accept empty encoded-text.
  	if len(word) < 8 || !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 {
  		return "", errInvalidWord
  	}
  	word = word[2 : len(word)-2]
  
  	// split delimits the first 2 fields
  	split := strings.IndexByte(word, '?')
  
  	// split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii"
  	charset := word[:split]
  	if len(charset) == 0 {
  		return "", errInvalidWord
  	}
  	if len(word) < split+3 {
  		return "", errInvalidWord
  	}
  	encoding := word[split+1]
  	// the field after split must only be one byte
  	if word[split+2] != '?' {
  		return "", errInvalidWord
  	}
  	text := word[split+3:]
  
  	content, err := decode(encoding, text)
  	if err != nil {
  		return "", err
  	}
  
  	buf := getBuffer()
  	defer putBuffer(buf)
  
  	if err := d.convert(buf, charset, content); err != nil {
  		return "", err
  	}
  
  	return buf.String(), nil
  }
  
  // DecodeHeader decodes all encoded-words of the given string. It returns an
  // error if and only if CharsetReader of d returns an error.
  func (d *WordDecoder) DecodeHeader(header string) (string, error) {
  	// If there is no encoded-word, returns before creating a buffer.
  	i := strings.Index(header, "=?")
  	if i == -1 {
  		return header, nil
  	}
  
  	buf := getBuffer()
  	defer putBuffer(buf)
  
  	buf.WriteString(header[:i])
  	header = header[i:]
  
  	betweenWords := false
  	for {
  		start := strings.Index(header, "=?")
  		if start == -1 {
  			break
  		}
  		cur := start + len("=?")
  
  		i := strings.Index(header[cur:], "?")
  		if i == -1 {
  			break
  		}
  		charset := header[cur : cur+i]
  		cur += i + len("?")
  
  		if len(header) < cur+len("Q??=") {
  			break
  		}
  		encoding := header[cur]
  		cur++
  
  		if header[cur] != '?' {
  			break
  		}
  		cur++
  
  		j := strings.Index(header[cur:], "?=")
  		if j == -1 {
  			break
  		}
  		text := header[cur : cur+j]
  		end := cur + j + len("?=")
  
  		content, err := decode(encoding, text)
  		if err != nil {
  			betweenWords = false
  			buf.WriteString(header[:start+2])
  			header = header[start+2:]
  			continue
  		}
  
  		// Write characters before the encoded-word. White-space and newline
  		// characters separating two encoded-words must be deleted.
  		if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
  			buf.WriteString(header[:start])
  		}
  
  		if err := d.convert(buf, charset, content); err != nil {
  			return "", err
  		}
  
  		header = header[end:]
  		betweenWords = true
  	}
  
  	if len(header) > 0 {
  		buf.WriteString(header)
  	}
  
  	return buf.String(), nil
  }
  
  func decode(encoding byte, text string) ([]byte, error) {
  	switch encoding {
  	case 'B', 'b':
  		return base64.StdEncoding.DecodeString(text)
  	case 'Q', 'q':
  		return qDecode(text)
  	default:
  		return nil, errInvalidWord
  	}
  }
  
  func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
  	switch {
  	case strings.EqualFold("utf-8", charset):
  		buf.Write(content)
  	case strings.EqualFold("iso-8859-1", charset):
  		for _, c := range content {
  			buf.WriteRune(rune(c))
  		}
  	case strings.EqualFold("us-ascii", charset):
  		for _, c := range content {
  			if c >= utf8.RuneSelf {
  				buf.WriteRune(unicode.ReplacementChar)
  			} else {
  				buf.WriteByte(c)
  			}
  		}
  	default:
  		if d.CharsetReader == nil {
  			return fmt.Errorf("mime: unhandled charset %q", charset)
  		}
  		r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
  		if err != nil {
  			return err
  		}
  		if _, err = buf.ReadFrom(r); err != nil {
  			return err
  		}
  	}
  	return nil
  }
  
  // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
  // one byte of non-whitespace.
  func hasNonWhitespace(s string) bool {
  	for _, b := range s {
  		switch b {
  		// Encoded-words can only be separated by linear white spaces which does
  		// not include vertical tabs (\v).
  		case ' ', '\t', '\n', '\r':
  		default:
  			return true
  		}
  	}
  	return false
  }
  
  // qDecode decodes a Q encoded string.
  func qDecode(s string) ([]byte, error) {
  	dec := make([]byte, len(s))
  	n := 0
  	for i := 0; i < len(s); i++ {
  		switch c := s[i]; {
  		case c == '_':
  			dec[n] = ' '
  		case c == '=':
  			if i+2 >= len(s) {
  				return nil, errInvalidWord
  			}
  			b, err := readHexByte(s[i+1], s[i+2])
  			if err != nil {
  				return nil, err
  			}
  			dec[n] = b
  			i += 2
  		case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
  			dec[n] = c
  		default:
  			return nil, errInvalidWord
  		}
  		n++
  	}
  
  	return dec[:n], nil
  }
  
  // readHexByte returns the byte from its quoted-printable representation.
  func readHexByte(a, b byte) (byte, error) {
  	var hb, lb byte
  	var err error
  	if hb, err = fromHex(a); err != nil {
  		return 0, err
  	}
  	if lb, err = fromHex(b); err != nil {
  		return 0, err
  	}
  	return hb<<4 | lb, nil
  }
  
  func fromHex(b byte) (byte, error) {
  	switch {
  	case b >= '0' && b <= '9':
  		return b - '0', nil
  	case b >= 'A' && b <= 'F':
  		return b - 'A' + 10, nil
  	// Accept badly encoded bytes.
  	case b >= 'a' && b <= 'f':
  		return b - 'a' + 10, nil
  	}
  	return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
  }
  
  var bufPool = sync.Pool{
  	New: func() interface{} {
  		return new(bytes.Buffer)
  	},
  }
  
  func getBuffer() *bytes.Buffer {
  	return bufPool.Get().(*bytes.Buffer)
  }
  
  func putBuffer(buf *bytes.Buffer) {
  	if buf.Len() > 1024 {
  		return
  	}
  	buf.Reset()
  	bufPool.Put(buf)
  }
  

View as plain text