multipart.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  //
     5  
     6  /*
     7  Package multipart implements MIME multipart parsing, as defined in RFC
     8  2046.
     9  
    10  The implementation is sufficient for HTTP (RFC 2388) and the multipart
    11  bodies generated by popular browsers.
    12  
    13  # Limits
    14  
    15  To protect against malicious inputs, this package sets limits on the size
    16  of the MIME data it processes.
    17  
    18  Reader.NextPart and Reader.NextRawPart limit the number of headers in a
    19  part to 10000 and Reader.ReadForm limits the total number of headers in all
    20  FileHeaders to 10000.
    21  These limits may be adjusted with the GODEBUG=multipartmaxheaders=<values>
    22  setting.
    23  
    24  Reader.ReadForm further limits the number of parts in a form to 1000.
    25  This limit may be adjusted with the GODEBUG=multipartmaxparts=<value>
    26  setting.
    27  */
    28  package multipart
    29  
    30  import (
    31  	"bufio"
    32  	"bytes"
    33  	"fmt"
    34  	"internal/godebug"
    35  	"io"
    36  	"mime"
    37  	"mime/quotedprintable"
    38  	"net/textproto"
    39  	"path/filepath"
    40  	"strconv"
    41  	"strings"
    42  )
    43  
    44  var emptyParams = make(map[string]string)
    45  
    46  // This constant needs to be at least 76 for this package to work correctly.
    47  // This is because \r\n--separator_of_len_70- would fill the buffer and it
    48  // wouldn't be safe to consume a single byte from it.
    49  const peekBufferSize = 4096
    50  
    51  // A Part represents a single part in a multipart body.
    52  type Part struct {
    53  	// The headers of the body, if any, with the keys canonicalized
    54  	// in the same fashion that the Go http.Request headers are.
    55  	// For example, "foo-bar" changes case to "Foo-Bar"
    56  	Header textproto.MIMEHeader
    57  
    58  	mr *Reader
    59  
    60  	disposition       string
    61  	dispositionParams map[string]string
    62  
    63  	// r is either a reader directly reading from mr, or it's a
    64  	// wrapper around such a reader, decoding the
    65  	// Content-Transfer-Encoding
    66  	r io.Reader
    67  
    68  	n       int   // known data bytes waiting in mr.bufReader
    69  	total   int64 // total data bytes read already
    70  	err     error // error to return when n == 0
    71  	readErr error // read error observed from mr.bufReader
    72  }
    73  
    74  // FormName returns the name parameter if p has a Content-Disposition
    75  // of type "form-data".  Otherwise it returns the empty string.
    76  func (p *Part) FormName() string {
    77  	// See https://tools.ietf.org/html/rfc2183 section 2 for EBNF
    78  	// of Content-Disposition value format.
    79  	if p.dispositionParams == nil {
    80  		p.parseContentDisposition()
    81  	}
    82  	if p.disposition != "form-data" {
    83  		return ""
    84  	}
    85  	return p.dispositionParams["name"]
    86  }
    87  
    88  // FileName returns the filename parameter of the Part's Content-Disposition
    89  // header. If not empty, the filename is passed through filepath.Base (which is
    90  // platform dependent) before being returned.
    91  func (p *Part) FileName() string {
    92  	if p.dispositionParams == nil {
    93  		p.parseContentDisposition()
    94  	}
    95  	filename := p.dispositionParams["filename"]
    96  	if filename == "" {
    97  		return ""
    98  	}
    99  	// RFC 7578, Section 4.2 requires that if a filename is provided, the
   100  	// directory path information must not be used.
   101  	return filepath.Base(filename)
   102  }
   103  
   104  func (p *Part) parseContentDisposition() {
   105  	v := p.Header.Get("Content-Disposition")
   106  	var err error
   107  	p.disposition, p.dispositionParams, err = mime.ParseMediaType(v)
   108  	if err != nil {
   109  		p.dispositionParams = emptyParams
   110  	}
   111  }
   112  
   113  // NewReader creates a new multipart Reader reading from r using the
   114  // given MIME boundary.
   115  //
   116  // The boundary is usually obtained from the "boundary" parameter of
   117  // the message's "Content-Type" header. Use mime.ParseMediaType to
   118  // parse such headers.
   119  func NewReader(r io.Reader, boundary string) *Reader {
   120  	b := []byte("\r\n--" + boundary + "--")
   121  	return &Reader{
   122  		bufReader:        bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize),
   123  		nl:               b[:2],
   124  		nlDashBoundary:   b[:len(b)-2],
   125  		dashBoundaryDash: b[2:],
   126  		dashBoundary:     b[2 : len(b)-2],
   127  	}
   128  }
   129  
   130  // stickyErrorReader is an io.Reader which never calls Read on its
   131  // underlying Reader once an error has been seen. (the io.Reader
   132  // interface's contract promises nothing about the return values of
   133  // Read calls after an error, yet this package does do multiple Reads
   134  // after error)
   135  type stickyErrorReader struct {
   136  	r   io.Reader
   137  	err error
   138  }
   139  
   140  func (r *stickyErrorReader) Read(p []byte) (n int, _ error) {
   141  	if r.err != nil {
   142  		return 0, r.err
   143  	}
   144  	n, r.err = r.r.Read(p)
   145  	return n, r.err
   146  }
   147  
   148  func newPart(mr *Reader, rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) {
   149  	bp := &Part{
   150  		Header: make(map[string][]string),
   151  		mr:     mr,
   152  	}
   153  	if err := bp.populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders); err != nil {
   154  		return nil, err
   155  	}
   156  	bp.r = partReader{bp}
   157  
   158  	// rawPart is used to switch between Part.NextPart and Part.NextRawPart.
   159  	if !rawPart {
   160  		const cte = "Content-Transfer-Encoding"
   161  		if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") {
   162  			bp.Header.Del(cte)
   163  			bp.r = quotedprintable.NewReader(bp.r)
   164  		}
   165  	}
   166  	return bp, nil
   167  }
   168  
   169  func (p *Part) populateHeaders(maxMIMEHeaderSize, maxMIMEHeaders int64) error {
   170  	r := textproto.NewReader(p.mr.bufReader)
   171  	header, err := readMIMEHeader(r, maxMIMEHeaderSize, maxMIMEHeaders)
   172  	if err == nil {
   173  		p.Header = header
   174  	}
   175  	// TODO: Add a distinguishable error to net/textproto.
   176  	if err != nil && err.Error() == "message too large" {
   177  		err = ErrMessageTooLarge
   178  	}
   179  	return err
   180  }
   181  
   182  // Read reads the body of a part, after its headers and before the
   183  // next part (if any) begins.
   184  func (p *Part) Read(d []byte) (n int, err error) {
   185  	return p.r.Read(d)
   186  }
   187  
   188  // partReader implements io.Reader by reading raw bytes directly from the
   189  // wrapped *Part, without doing any Transfer-Encoding decoding.
   190  type partReader struct {
   191  	p *Part
   192  }
   193  
   194  func (pr partReader) Read(d []byte) (int, error) {
   195  	p := pr.p
   196  	br := p.mr.bufReader
   197  
   198  	// Read into buffer until we identify some data to return,
   199  	// or we find a reason to stop (boundary or read error).
   200  	for p.n == 0 && p.err == nil {
   201  		peek, _ := br.Peek(br.Buffered())
   202  		p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr)
   203  		if p.n == 0 && p.err == nil {
   204  			// Force buffered I/O to read more into buffer.
   205  			_, p.readErr = br.Peek(len(peek) + 1)
   206  			if p.readErr == io.EOF {
   207  				p.readErr = io.ErrUnexpectedEOF
   208  			}
   209  		}
   210  	}
   211  
   212  	// Read out from "data to return" part of buffer.
   213  	if p.n == 0 {
   214  		return 0, p.err
   215  	}
   216  	n := len(d)
   217  	if n > p.n {
   218  		n = p.n
   219  	}
   220  	n, _ = br.Read(d[:n])
   221  	p.total += int64(n)
   222  	p.n -= n
   223  	if p.n == 0 {
   224  		return n, p.err
   225  	}
   226  	return n, nil
   227  }
   228  
   229  // scanUntilBoundary scans buf to identify how much of it can be safely
   230  // returned as part of the Part body.
   231  // dashBoundary is "--boundary".
   232  // nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in.
   233  // The comments below (and the name) assume "\n--boundary", but either is accepted.
   234  // total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized.
   235  // readErr is the read error, if any, that followed reading the bytes in buf.
   236  // scanUntilBoundary returns the number of data bytes from buf that can be
   237  // returned as part of the Part body and also the error to return (if any)
   238  // once those data bytes are done.
   239  func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) {
   240  	if total == 0 {
   241  		// At beginning of body, allow dashBoundary.
   242  		if bytes.HasPrefix(buf, dashBoundary) {
   243  			switch matchAfterPrefix(buf, dashBoundary, readErr) {
   244  			case -1:
   245  				return len(dashBoundary), nil
   246  			case 0:
   247  				return 0, nil
   248  			case +1:
   249  				return 0, io.EOF
   250  			}
   251  		}
   252  		if bytes.HasPrefix(dashBoundary, buf) {
   253  			return 0, readErr
   254  		}
   255  	}
   256  
   257  	// Search for "\n--boundary".
   258  	if i := bytes.Index(buf, nlDashBoundary); i >= 0 {
   259  		switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) {
   260  		case -1:
   261  			return i + len(nlDashBoundary), nil
   262  		case 0:
   263  			return i, nil
   264  		case +1:
   265  			return i, io.EOF
   266  		}
   267  	}
   268  	if bytes.HasPrefix(nlDashBoundary, buf) {
   269  		return 0, readErr
   270  	}
   271  
   272  	// Otherwise, anything up to the final \n is not part of the boundary
   273  	// and so must be part of the body.
   274  	// Also if the section from the final \n onward is not a prefix of the boundary,
   275  	// it too must be part of the body.
   276  	i := bytes.LastIndexByte(buf, nlDashBoundary[0])
   277  	if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) {
   278  		return i, nil
   279  	}
   280  	return len(buf), readErr
   281  }
   282  
   283  // matchAfterPrefix checks whether buf should be considered to match the boundary.
   284  // The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary",
   285  // and the caller has verified already that bytes.HasPrefix(buf, prefix) is true.
   286  //
   287  // matchAfterPrefix returns +1 if the buffer does match the boundary,
   288  // meaning the prefix is followed by a double dash, space, tab, cr, nl,
   289  // or end of input.
   290  // It returns -1 if the buffer definitely does NOT match the boundary,
   291  // meaning the prefix is followed by some other character.
   292  // For example, "--foobar" does not match "--foo".
   293  // It returns 0 more input needs to be read to make the decision,
   294  // meaning that len(buf) == len(prefix) and readErr == nil.
   295  func matchAfterPrefix(buf, prefix []byte, readErr error) int {
   296  	if len(buf) == len(prefix) {
   297  		if readErr != nil {
   298  			return +1
   299  		}
   300  		return 0
   301  	}
   302  	c := buf[len(prefix)]
   303  
   304  	if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
   305  		return +1
   306  	}
   307  
   308  	// Try to detect boundaryDash
   309  	if c == '-' {
   310  		if len(buf) == len(prefix)+1 {
   311  			if readErr != nil {
   312  				// Prefix + "-" does not match
   313  				return -1
   314  			}
   315  			return 0
   316  		}
   317  		if buf[len(prefix)+1] == '-' {
   318  			return +1
   319  		}
   320  	}
   321  
   322  	return -1
   323  }
   324  
   325  func (p *Part) Close() error {
   326  	io.Copy(io.Discard, p)
   327  	return nil
   328  }
   329  
   330  // Reader is an iterator over parts in a MIME multipart body.
   331  // Reader's underlying parser consumes its input as needed. Seeking
   332  // isn't supported.
   333  type Reader struct {
   334  	bufReader *bufio.Reader
   335  	tempDir   string // used in tests
   336  
   337  	currentPart *Part
   338  	partsRead   int
   339  
   340  	nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
   341  	nlDashBoundary   []byte // nl + "--boundary"
   342  	dashBoundaryDash []byte // "--boundary--"
   343  	dashBoundary     []byte // "--boundary"
   344  }
   345  
   346  // maxMIMEHeaderSize is the maximum size of a MIME header we will parse,
   347  // including header keys, values, and map overhead.
   348  const maxMIMEHeaderSize = 10 << 20
   349  
   350  // multipartMaxHeaders is the maximum number of header entries NextPart will return,
   351  // as well as the maximum combined total of header entries Reader.ReadForm will return
   352  // in FileHeaders.
   353  var multipartMaxHeaders = godebug.New("multipartmaxheaders")
   354  
   355  func maxMIMEHeaders() int64 {
   356  	if s := multipartMaxHeaders.Value(); s != "" {
   357  		if v, err := strconv.ParseInt(s, 10, 64); err == nil && v >= 0 {
   358  			multipartMaxHeaders.IncNonDefault()
   359  			return v
   360  		}
   361  	}
   362  	return 10000
   363  }
   364  
   365  // NextPart returns the next part in the multipart or an error.
   366  // When there are no more parts, the error io.EOF is returned.
   367  //
   368  // As a special case, if the "Content-Transfer-Encoding" header
   369  // has a value of "quoted-printable", that header is instead
   370  // hidden and the body is transparently decoded during Read calls.
   371  func (r *Reader) NextPart() (*Part, error) {
   372  	return r.nextPart(false, maxMIMEHeaderSize, maxMIMEHeaders())
   373  }
   374  
   375  // NextRawPart returns the next part in the multipart or an error.
   376  // When there are no more parts, the error io.EOF is returned.
   377  //
   378  // Unlike NextPart, it does not have special handling for
   379  // "Content-Transfer-Encoding: quoted-printable".
   380  func (r *Reader) NextRawPart() (*Part, error) {
   381  	return r.nextPart(true, maxMIMEHeaderSize, maxMIMEHeaders())
   382  }
   383  
   384  func (r *Reader) nextPart(rawPart bool, maxMIMEHeaderSize, maxMIMEHeaders int64) (*Part, error) {
   385  	if r.currentPart != nil {
   386  		r.currentPart.Close()
   387  	}
   388  	if string(r.dashBoundary) == "--" {
   389  		return nil, fmt.Errorf("multipart: boundary is empty")
   390  	}
   391  	expectNewPart := false
   392  	for {
   393  		line, err := r.bufReader.ReadSlice('\n')
   394  
   395  		if err == io.EOF && r.isFinalBoundary(line) {
   396  			// If the buffer ends in "--boundary--" without the
   397  			// trailing "\r\n", ReadSlice will return an error
   398  			// (since it's missing the '\n'), but this is a valid
   399  			// multipart EOF so we need to return io.EOF instead of
   400  			// a fmt-wrapped one.
   401  			return nil, io.EOF
   402  		}
   403  		if err != nil {
   404  			return nil, fmt.Errorf("multipart: NextPart: %w", err)
   405  		}
   406  
   407  		if r.isBoundaryDelimiterLine(line) {
   408  			r.partsRead++
   409  			bp, err := newPart(r, rawPart, maxMIMEHeaderSize, maxMIMEHeaders)
   410  			if err != nil {
   411  				return nil, err
   412  			}
   413  			r.currentPart = bp
   414  			return bp, nil
   415  		}
   416  
   417  		if r.isFinalBoundary(line) {
   418  			// Expected EOF
   419  			return nil, io.EOF
   420  		}
   421  
   422  		if expectNewPart {
   423  			return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line))
   424  		}
   425  
   426  		if r.partsRead == 0 {
   427  			// skip line
   428  			continue
   429  		}
   430  
   431  		// Consume the "\n" or "\r\n" separator between the
   432  		// body of the previous part and the boundary line we
   433  		// now expect will follow. (either a new part or the
   434  		// end boundary)
   435  		if bytes.Equal(line, r.nl) {
   436  			expectNewPart = true
   437  			continue
   438  		}
   439  
   440  		return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line)
   441  	}
   442  }
   443  
   444  // isFinalBoundary reports whether line is the final boundary line
   445  // indicating that all parts are over.
   446  // It matches `^--boundary--[ \t]*(\r\n)?$`
   447  func (r *Reader) isFinalBoundary(line []byte) bool {
   448  	if !bytes.HasPrefix(line, r.dashBoundaryDash) {
   449  		return false
   450  	}
   451  	rest := line[len(r.dashBoundaryDash):]
   452  	rest = skipLWSPChar(rest)
   453  	return len(rest) == 0 || bytes.Equal(rest, r.nl)
   454  }
   455  
   456  func (r *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
   457  	// https://tools.ietf.org/html/rfc2046#section-5.1
   458  	//   The boundary delimiter line is then defined as a line
   459  	//   consisting entirely of two hyphen characters ("-",
   460  	//   decimal value 45) followed by the boundary parameter
   461  	//   value from the Content-Type header field, optional linear
   462  	//   whitespace, and a terminating CRLF.
   463  	if !bytes.HasPrefix(line, r.dashBoundary) {
   464  		return false
   465  	}
   466  	rest := line[len(r.dashBoundary):]
   467  	rest = skipLWSPChar(rest)
   468  
   469  	// On the first part, see our lines are ending in \n instead of \r\n
   470  	// and switch into that mode if so. This is a violation of the spec,
   471  	// but occurs in practice.
   472  	if r.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
   473  		r.nl = r.nl[1:]
   474  		r.nlDashBoundary = r.nlDashBoundary[1:]
   475  	}
   476  	return bytes.Equal(rest, r.nl)
   477  }
   478  
   479  // skipLWSPChar returns b with leading spaces and tabs removed.
   480  // RFC 822 defines:
   481  //
   482  //	LWSP-char = SPACE / HTAB
   483  func skipLWSPChar(b []byte) []byte {
   484  	for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
   485  		b = b[1:]
   486  	}
   487  	return b
   488  }
   489
View as plain text