Source file src/mime/quotedprintable/reader.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package quotedprintable implements quoted-printable encoding as specified by
     6  // RFC 2045.
     7  package quotedprintable
     8  
     9  import (
    10  	"bufio"
    11  	"bytes"
    12  	"fmt"
    13  	"io"
    14  )
    15  
    16  // Reader is a quoted-printable decoder.
    17  type Reader struct {
    18  	br   *bufio.Reader
    19  	rerr error  // last read error
    20  	line []byte // to be consumed before more of br
    21  }
    22  
    23  // NewReader returns a quoted-printable reader, decoding from r.
    24  func NewReader(r io.Reader) *Reader {
    25  	return &Reader{
    26  		br: bufio.NewReader(r),
    27  	}
    28  }
    29  
    30  func fromHex(b byte) (byte, error) {
    31  	switch {
    32  	case b >= '0' && b <= '9':
    33  		return b - '0', nil
    34  	case b >= 'A' && b <= 'F':
    35  		return b - 'A' + 10, nil
    36  	// Accept badly encoded bytes.
    37  	case b >= 'a' && b <= 'f':
    38  		return b - 'a' + 10, nil
    39  	}
    40  	return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b)
    41  }
    42  
    43  func readHexByte(v []byte) (b byte, err error) {
    44  	if len(v) < 2 {
    45  		return 0, io.ErrUnexpectedEOF
    46  	}
    47  	var hb, lb byte
    48  	if hb, err = fromHex(v[0]); err != nil {
    49  		return 0, err
    50  	}
    51  	if lb, err = fromHex(v[1]); err != nil {
    52  		return 0, err
    53  	}
    54  	return hb<<4 | lb, nil
    55  }
    56  
    57  func isQPDiscardWhitespace(r rune) bool {
    58  	switch r {
    59  	case '\n', '\r', ' ', '\t':
    60  		return true
    61  	}
    62  	return false
    63  }
    64  
    65  var (
    66  	crlf       = []byte("\r\n")
    67  	lf         = []byte("\n")
    68  	softSuffix = []byte("=")
    69  )
    70  
    71  // Read reads and decodes quoted-printable data from the underlying reader.
    72  func (r *Reader) Read(p []byte) (n int, err error) {
    73  	// Deviations from RFC 2045:
    74  	// 1. in addition to "=\r\n", "=\n" is also treated as soft line break.
    75  	// 2. it will pass through a '\r' or '\n' not preceded by '=', consistent
    76  	//    with other broken QP encoders & decoders.
    77  	// 3. it accepts soft line-break (=) at end of message (issue 15486); i.e.
    78  	//    the final byte read from the underlying reader is allowed to be '=',
    79  	//    and it will be silently ignored.
    80  	// 4. it takes = as literal = if not followed by two hex digits
    81  	//    but not at end of line (issue 13219).
    82  	for len(p) > 0 {
    83  		if len(r.line) == 0 {
    84  			if r.rerr != nil {
    85  				return n, r.rerr
    86  			}
    87  			r.line, r.rerr = r.br.ReadSlice('\n')
    88  
    89  			// Does the line end in CRLF instead of just LF?
    90  			hasLF := bytes.HasSuffix(r.line, lf)
    91  			hasCR := bytes.HasSuffix(r.line, crlf)
    92  			wholeLine := r.line
    93  			r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace)
    94  			if bytes.HasSuffix(r.line, softSuffix) {
    95  				rightStripped := wholeLine[len(r.line):]
    96  				r.line = r.line[:len(r.line)-1]
    97  				if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) &&
    98  					!(len(rightStripped) == 0 && len(r.line) > 0 && r.rerr == io.EOF) {
    99  					r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped)
   100  				}
   101  			} else if hasLF {
   102  				if hasCR {
   103  					r.line = append(r.line, '\r', '\n')
   104  				} else {
   105  					r.line = append(r.line, '\n')
   106  				}
   107  			}
   108  			continue
   109  		}
   110  		b := r.line[0]
   111  
   112  		switch {
   113  		case b == '=':
   114  			b, err = readHexByte(r.line[1:])
   115  			if err != nil {
   116  				if len(r.line) >= 2 && r.line[1] != '\r' && r.line[1] != '\n' {
   117  					// Take the = as a literal =.
   118  					b = '='
   119  					break
   120  				}
   121  				return n, err
   122  			}
   123  			r.line = r.line[2:] // 2 of the 3; other 1 is done below
   124  		case b == '\t' || b == '\r' || b == '\n':
   125  			break
   126  		case b >= 0x80:
   127  			// As an extension to RFC 2045, we accept
   128  			// values >= 0x80 without complaint. Issue 22597.
   129  			break
   130  		case b < ' ' || b > '~':
   131  			return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b)
   132  		}
   133  		p[0] = b
   134  		p = p[1:]
   135  		r.line = r.line[1:]
   136  		n++
   137  	}
   138  	return n, nil
   139  }
   140  

View as plain text