...
Run Format

Source file src/net/mail/message.go

Documentation: net/mail

  // Copyright 2011 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  /*
  Package mail implements parsing of mail messages.
  
  For the most part, this package follows the syntax as specified by RFC 5322 and
  extended by RFC 6532.
  Notable divergences:
  	* Obsolete address formats are not parsed, including addresses with
  	  embedded route information.
  	* Group addresses are not parsed.
  	* The full range of spacing (the CFWS syntax element) is not supported,
  	  such as breaking addresses across lines.
  	* No unicode normalization is performed.
  */
  package mail
  
  import (
  	"bufio"
  	"bytes"
  	"errors"
  	"fmt"
  	"io"
  	"log"
  	"mime"
  	"net/textproto"
  	"strings"
  	"time"
  	"unicode/utf8"
  )
  
  var debug = debugT(false)
  
  type debugT bool
  
  func (d debugT) Printf(format string, args ...interface{}) {
  	if d {
  		log.Printf(format, args...)
  	}
  }
  
  // A Message represents a parsed mail message.
  type Message struct {
  	Header Header
  	Body   io.Reader
  }
  
  // ReadMessage reads a message from r.
  // The headers are parsed, and the body of the message will be available
  // for reading from msg.Body.
  func ReadMessage(r io.Reader) (msg *Message, err error) {
  	tp := textproto.NewReader(bufio.NewReader(r))
  
  	hdr, err := tp.ReadMIMEHeader()
  	if err != nil {
  		return nil, err
  	}
  
  	return &Message{
  		Header: Header(hdr),
  		Body:   tp.R,
  	}, nil
  }
  
  // Layouts suitable for passing to time.Parse.
  // These are tried in order.
  var dateLayouts []string
  
  func init() {
  	// Generate layouts based on RFC 5322, section 3.3.
  
  	dows := [...]string{"", "Mon, "}   // day-of-week
  	days := [...]string{"2", "02"}     // day = 1*2DIGIT
  	years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
  	seconds := [...]string{":05", ""}  // second
  	// "-0700 (MST)" is not in RFC 5322, but is common.
  	zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
  
  	for _, dow := range dows {
  		for _, day := range days {
  			for _, year := range years {
  				for _, second := range seconds {
  					for _, zone := range zones {
  						s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
  						dateLayouts = append(dateLayouts, s)
  					}
  				}
  			}
  		}
  	}
  }
  
  // ParseDate parses an RFC 5322 date string.
  func ParseDate(date string) (time.Time, error) {
  	for _, layout := range dateLayouts {
  		t, err := time.Parse(layout, date)
  		if err == nil {
  			return t, nil
  		}
  	}
  	return time.Time{}, errors.New("mail: header could not be parsed")
  }
  
  // A Header represents the key-value pairs in a mail message header.
  type Header map[string][]string
  
  // Get gets the first value associated with the given key.
  // It is case insensitive; CanonicalMIMEHeaderKey is used
  // to canonicalize the provided key.
  // If there are no values associated with the key, Get returns "".
  // To access multiple values of a key, or to use non-canonical keys,
  // access the map directly.
  func (h Header) Get(key string) string {
  	return textproto.MIMEHeader(h).Get(key)
  }
  
  var ErrHeaderNotPresent = errors.New("mail: header not in message")
  
  // Date parses the Date header field.
  func (h Header) Date() (time.Time, error) {
  	hdr := h.Get("Date")
  	if hdr == "" {
  		return time.Time{}, ErrHeaderNotPresent
  	}
  	return ParseDate(hdr)
  }
  
  // AddressList parses the named header field as a list of addresses.
  func (h Header) AddressList(key string) ([]*Address, error) {
  	hdr := h.Get(key)
  	if hdr == "" {
  		return nil, ErrHeaderNotPresent
  	}
  	return ParseAddressList(hdr)
  }
  
  // Address represents a single mail address.
  // An address such as "Barry Gibbs <bg@example.com>" is represented
  // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
  type Address struct {
  	Name    string // Proper name; may be empty.
  	Address string // user@domain
  }
  
  // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
  func ParseAddress(address string) (*Address, error) {
  	return (&addrParser{s: address}).parseSingleAddress()
  }
  
  // ParseAddressList parses the given string as a list of addresses.
  func ParseAddressList(list string) ([]*Address, error) {
  	return (&addrParser{s: list}).parseAddressList()
  }
  
  // An AddressParser is an RFC 5322 address parser.
  type AddressParser struct {
  	// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
  	WordDecoder *mime.WordDecoder
  }
  
  // Parse parses a single RFC 5322 address of the
  // form "Gogh Fir <gf@example.com>" or "foo@example.com".
  func (p *AddressParser) Parse(address string) (*Address, error) {
  	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
  }
  
  // ParseList parses the given string as a list of comma-separated addresses
  // of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
  func (p *AddressParser) ParseList(list string) ([]*Address, error) {
  	return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
  }
  
  // String formats the address as a valid RFC 5322 address.
  // If the address's name contains non-ASCII characters
  // the name will be rendered according to RFC 2047.
  func (a *Address) String() string {
  	// Format address local@domain
  	at := strings.LastIndex(a.Address, "@")
  	var local, domain string
  	if at < 0 {
  		// This is a malformed address ("@" is required in addr-spec);
  		// treat the whole address as local-part.
  		local = a.Address
  	} else {
  		local, domain = a.Address[:at], a.Address[at+1:]
  	}
  
  	// Add quotes if needed
  	quoteLocal := false
  	for i, r := range local {
  		if isAtext(r, false) {
  			continue
  		}
  		if r == '.' {
  			// Dots are okay if they are surrounded by atext.
  			// We only need to check that the previous byte is
  			// not a dot, and this isn't the end of the string.
  			if i > 0 && local[i-1] != '.' && i < len(local)-1 {
  				continue
  			}
  		}
  		quoteLocal = true
  		break
  	}
  	if quoteLocal {
  		local = quoteString(local)
  
  	}
  
  	s := "<" + local + "@" + domain + ">"
  
  	if a.Name == "" {
  		return s
  	}
  
  	// If every character is printable ASCII, quoting is simple.
  	allPrintable := true
  	for _, r := range a.Name {
  		// isWSP here should actually be isFWS,
  		// but we don't support folding yet.
  		if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
  			allPrintable = false
  			break
  		}
  	}
  	if allPrintable {
  		return quoteString(a.Name) + " " + s
  	}
  
  	// Text in an encoded-word in a display-name must not contain certain
  	// characters like quotes or parentheses (see RFC 2047 section 5.3).
  	// When this is the case encode the name using base64 encoding.
  	if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
  		return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
  	}
  	return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
  }
  
  type addrParser struct {
  	s   string
  	dec *mime.WordDecoder // may be nil
  }
  
  func (p *addrParser) parseAddressList() ([]*Address, error) {
  	var list []*Address
  	for {
  		p.skipSpace()
  		addr, err := p.parseAddress()
  		if err != nil {
  			return nil, err
  		}
  		list = append(list, addr)
  
  		p.skipSpace()
  		if p.empty() {
  			break
  		}
  		if !p.consume(',') {
  			return nil, errors.New("mail: expected comma")
  		}
  	}
  	return list, nil
  }
  
  func (p *addrParser) parseSingleAddress() (*Address, error) {
  	addr, err := p.parseAddress()
  	if err != nil {
  		return nil, err
  	}
  	p.skipSpace()
  	if !p.empty() {
  		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
  	}
  	return addr, nil
  }
  
  // parseAddress parses a single RFC 5322 address at the start of p.
  func (p *addrParser) parseAddress() (addr *Address, err error) {
  	debug.Printf("parseAddress: %q", p.s)
  	p.skipSpace()
  	if p.empty() {
  		return nil, errors.New("mail: no address")
  	}
  
  	// address = name-addr / addr-spec
  	// TODO(dsymonds): Support parsing group address.
  
  	// addr-spec has a more restricted grammar than name-addr,
  	// so try parsing it first, and fallback to name-addr.
  	// TODO(dsymonds): Is this really correct?
  	spec, err := p.consumeAddrSpec()
  	if err == nil {
  		return &Address{
  			Address: spec,
  		}, err
  	}
  	debug.Printf("parseAddress: not an addr-spec: %v", err)
  	debug.Printf("parseAddress: state is now %q", p.s)
  
  	// display-name
  	var displayName string
  	if p.peek() != '<' {
  		displayName, err = p.consumePhrase()
  		if err != nil {
  			return nil, err
  		}
  	}
  	debug.Printf("parseAddress: displayName=%q", displayName)
  
  	// angle-addr = "<" addr-spec ">"
  	p.skipSpace()
  	if !p.consume('<') {
  		return nil, errors.New("mail: no angle-addr")
  	}
  	spec, err = p.consumeAddrSpec()
  	if err != nil {
  		return nil, err
  	}
  	if !p.consume('>') {
  		return nil, errors.New("mail: unclosed angle-addr")
  	}
  	debug.Printf("parseAddress: spec=%q", spec)
  
  	return &Address{
  		Name:    displayName,
  		Address: spec,
  	}, nil
  }
  
  // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
  func (p *addrParser) consumeAddrSpec() (spec string, err error) {
  	debug.Printf("consumeAddrSpec: %q", p.s)
  
  	orig := *p
  	defer func() {
  		if err != nil {
  			*p = orig
  		}
  	}()
  
  	// local-part = dot-atom / quoted-string
  	var localPart string
  	p.skipSpace()
  	if p.empty() {
  		return "", errors.New("mail: no addr-spec")
  	}
  	if p.peek() == '"' {
  		// quoted-string
  		debug.Printf("consumeAddrSpec: parsing quoted-string")
  		localPart, err = p.consumeQuotedString()
  		if localPart == "" {
  			err = errors.New("mail: empty quoted string in addr-spec")
  		}
  	} else {
  		// dot-atom
  		debug.Printf("consumeAddrSpec: parsing dot-atom")
  		localPart, err = p.consumeAtom(true, false)
  	}
  	if err != nil {
  		debug.Printf("consumeAddrSpec: failed: %v", err)
  		return "", err
  	}
  
  	if !p.consume('@') {
  		return "", errors.New("mail: missing @ in addr-spec")
  	}
  
  	// domain = dot-atom / domain-literal
  	var domain string
  	p.skipSpace()
  	if p.empty() {
  		return "", errors.New("mail: no domain in addr-spec")
  	}
  	// TODO(dsymonds): Handle domain-literal
  	domain, err = p.consumeAtom(true, false)
  	if err != nil {
  		return "", err
  	}
  
  	return localPart + "@" + domain, nil
  }
  
  // consumePhrase parses the RFC 5322 phrase at the start of p.
  func (p *addrParser) consumePhrase() (phrase string, err error) {
  	debug.Printf("consumePhrase: [%s]", p.s)
  	// phrase = 1*word
  	var words []string
  	var isPrevEncoded bool
  	for {
  		// word = atom / quoted-string
  		var word string
  		p.skipSpace()
  		if p.empty() {
  			break
  		}
  		isEncoded := false
  		if p.peek() == '"' {
  			// quoted-string
  			word, err = p.consumeQuotedString()
  		} else {
  			// atom
  			// We actually parse dot-atom here to be more permissive
  			// than what RFC 5322 specifies.
  			word, err = p.consumeAtom(true, true)
  			if err == nil {
  				word, isEncoded, err = p.decodeRFC2047Word(word)
  			}
  		}
  
  		if err != nil {
  			break
  		}
  		debug.Printf("consumePhrase: consumed %q", word)
  		if isPrevEncoded && isEncoded {
  			words[len(words)-1] += word
  		} else {
  			words = append(words, word)
  		}
  		isPrevEncoded = isEncoded
  	}
  	// Ignore any error if we got at least one word.
  	if err != nil && len(words) == 0 {
  		debug.Printf("consumePhrase: hit err: %v", err)
  		return "", fmt.Errorf("mail: missing word in phrase: %v", err)
  	}
  	phrase = strings.Join(words, " ")
  	return phrase, nil
  }
  
  // consumeQuotedString parses the quoted string at the start of p.
  func (p *addrParser) consumeQuotedString() (qs string, err error) {
  	// Assume first byte is '"'.
  	i := 1
  	qsb := make([]rune, 0, 10)
  
  	escaped := false
  
  Loop:
  	for {
  		r, size := utf8.DecodeRuneInString(p.s[i:])
  
  		switch {
  		case size == 0:
  			return "", errors.New("mail: unclosed quoted-string")
  
  		case size == 1 && r == utf8.RuneError:
  			return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
  
  		case escaped:
  			//  quoted-pair = ("\" (VCHAR / WSP))
  
  			if !isVchar(r) && !isWSP(r) {
  				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
  			}
  
  			qsb = append(qsb, r)
  			escaped = false
  
  		case isQtext(r) || isWSP(r):
  			// qtext (printable US-ASCII excluding " and \), or
  			// FWS (almost; we're ignoring CRLF)
  			qsb = append(qsb, r)
  
  		case r == '"':
  			break Loop
  
  		case r == '\\':
  			escaped = true
  
  		default:
  			return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
  
  		}
  
  		i += size
  	}
  	p.s = p.s[i+1:]
  	return string(qsb), nil
  }
  
  // consumeAtom parses an RFC 5322 atom at the start of p.
  // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
  // If permissive is true, consumeAtom will not fail on
  // leading/trailing/double dots in the atom (see golang.org/issue/4938).
  func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
  	i := 0
  
  Loop:
  	for {
  		r, size := utf8.DecodeRuneInString(p.s[i:])
  
  		switch {
  		case size == 1 && r == utf8.RuneError:
  			return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
  
  		case size == 0 || !isAtext(r, dot):
  			break Loop
  
  		default:
  			i += size
  
  		}
  	}
  
  	if i == 0 {
  		return "", errors.New("mail: invalid string")
  	}
  	atom, p.s = p.s[:i], p.s[i:]
  	if !permissive {
  		if strings.HasPrefix(atom, ".") {
  			return "", errors.New("mail: leading dot in atom")
  		}
  		if strings.Contains(atom, "..") {
  			return "", errors.New("mail: double dot in atom")
  		}
  		if strings.HasSuffix(atom, ".") {
  			return "", errors.New("mail: trailing dot in atom")
  		}
  	}
  	return atom, nil
  }
  
  func (p *addrParser) consume(c byte) bool {
  	if p.empty() || p.peek() != c {
  		return false
  	}
  	p.s = p.s[1:]
  	return true
  }
  
  // skipSpace skips the leading space and tab characters.
  func (p *addrParser) skipSpace() {
  	p.s = strings.TrimLeft(p.s, " \t")
  }
  
  func (p *addrParser) peek() byte {
  	return p.s[0]
  }
  
  func (p *addrParser) empty() bool {
  	return p.len() == 0
  }
  
  func (p *addrParser) len() int {
  	return len(p.s)
  }
  
  func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
  	if p.dec != nil {
  		word, err = p.dec.Decode(s)
  	} else {
  		word, err = rfc2047Decoder.Decode(s)
  	}
  
  	if err == nil {
  		return word, true, nil
  	}
  
  	if _, ok := err.(charsetError); ok {
  		return s, true, err
  	}
  
  	// Ignore invalid RFC 2047 encoded-word errors.
  	return s, false, nil
  }
  
  var rfc2047Decoder = mime.WordDecoder{
  	CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
  		return nil, charsetError(charset)
  	},
  }
  
  type charsetError string
  
  func (e charsetError) Error() string {
  	return fmt.Sprintf("charset not supported: %q", string(e))
  }
  
  // isAtext reports whether r is an RFC 5322 atext character.
  // If dot is true, period is included.
  func isAtext(r rune, dot bool) bool {
  	switch r {
  	case '.':
  		return dot
  
  	case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials
  		return false
  	}
  	return isVchar(r)
  }
  
  // isQtext reports whether r is an RFC 5322 qtext character.
  func isQtext(r rune) bool {
  	// Printable US-ASCII, excluding backslash or quote.
  	if r == '\\' || r == '"' {
  		return false
  	}
  	return isVchar(r)
  }
  
  // quoteString renders a string as an RFC 5322 quoted-string.
  func quoteString(s string) string {
  	var buf bytes.Buffer
  	buf.WriteByte('"')
  	for _, r := range s {
  		if isQtext(r) || isWSP(r) {
  			buf.WriteRune(r)
  		} else if isVchar(r) {
  			buf.WriteByte('\\')
  			buf.WriteRune(r)
  		}
  	}
  	buf.WriteByte('"')
  	return buf.String()
  }
  
  // isVchar reports whether r is an RFC 5322 VCHAR character.
  func isVchar(r rune) bool {
  	// Visible (printing) characters.
  	return '!' <= r && r <= '~' || isMultibyte(r)
  }
  
  // isMultibyte reports whether r is a multi-byte UTF-8 character
  // as supported by RFC 6532
  func isMultibyte(r rune) bool {
  	return r >= utf8.RuneSelf
  }
  
  // isWSP reports whether r is a WSP (white space).
  // WSP is a space or horizontal tab (RFC 5234 Appendix B).
  func isWSP(r rune) bool {
  	return r == ' ' || r == '\t'
  }
  

View as plain text