reader.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package textproto
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"math"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  )
    18  
    19  // TODO: This should be a distinguishable error (ErrMessageTooLarge)
    20  // to allow mime/multipart to detect it.
    21  var errMessageTooLarge = errors.New("message too large")
    22  
    23  // A Reader implements convenience methods for reading requests
    24  // or responses from a text protocol network connection.
    25  type Reader struct {
    26  	R   *bufio.Reader
    27  	dot *dotReader
    28  	buf []byte // a re-usable buffer for readContinuedLineSlice
    29  }
    30  
    31  // NewReader returns a new [Reader] reading from r.
    32  //
    33  // To avoid denial of service attacks, the provided [bufio.Reader]
    34  // should be reading from an [io.LimitReader] or similar Reader to bound
    35  // the size of responses.
    36  func NewReader(r *bufio.Reader) *Reader {
    37  	return &Reader{R: r}
    38  }
    39  
    40  // ReadLine reads a single line from r,
    41  // eliding the final \n or \r\n from the returned string.
    42  func (r *Reader) ReadLine() (string, error) {
    43  	line, err := r.readLineSlice(-1)
    44  	return string(line), err
    45  }
    46  
    47  // ReadLineBytes is like [Reader.ReadLine] but returns a []byte instead of a string.
    48  func (r *Reader) ReadLineBytes() ([]byte, error) {
    49  	line, err := r.readLineSlice(-1)
    50  	if line != nil {
    51  		line = bytes.Clone(line)
    52  	}
    53  	return line, err
    54  }
    55  
    56  // readLineSlice reads a single line from r,
    57  // up to lim bytes long (or unlimited if lim is less than 0),
    58  // eliding the final \r or \r\n from the returned string.
    59  func (r *Reader) readLineSlice(lim int64) ([]byte, error) {
    60  	r.closeDot()
    61  	var line []byte
    62  	for {
    63  		l, more, err := r.R.ReadLine()
    64  		if err != nil {
    65  			return nil, err
    66  		}
    67  		if lim >= 0 && int64(len(line))+int64(len(l)) > lim {
    68  			return nil, errMessageTooLarge
    69  		}
    70  		// Avoid the copy if the first call produced a full line.
    71  		if line == nil && !more {
    72  			return l, nil
    73  		}
    74  		line = append(line, l...)
    75  		if !more {
    76  			break
    77  		}
    78  	}
    79  	return line, nil
    80  }
    81  
    82  // ReadContinuedLine reads a possibly continued line from r,
    83  // eliding the final trailing ASCII white space.
    84  // Lines after the first are considered continuations if they
    85  // begin with a space or tab character. In the returned data,
    86  // continuation lines are separated from the previous line
    87  // only by a single space: the newline and leading white space
    88  // are removed.
    89  //
    90  // For example, consider this input:
    91  //
    92  //	Line 1
    93  //	  continued...
    94  //	Line 2
    95  //
    96  // The first call to ReadContinuedLine will return "Line 1 continued..."
    97  // and the second will return "Line 2".
    98  //
    99  // Empty lines are never continued.
   100  func (r *Reader) ReadContinuedLine() (string, error) {
   101  	line, err := r.readContinuedLineSlice(-1, noValidation)
   102  	return string(line), err
   103  }
   104  
   105  // trim returns s with leading and trailing spaces and tabs removed.
   106  // It does not assume Unicode or UTF-8.
   107  func trim(s []byte) []byte {
   108  	i := 0
   109  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   110  		i++
   111  	}
   112  	n := len(s)
   113  	for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
   114  		n--
   115  	}
   116  	return s[i:n]
   117  }
   118  
   119  // ReadContinuedLineBytes is like [Reader.ReadContinuedLine] but
   120  // returns a []byte instead of a string.
   121  func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
   122  	line, err := r.readContinuedLineSlice(-1, noValidation)
   123  	if line != nil {
   124  		line = bytes.Clone(line)
   125  	}
   126  	return line, err
   127  }
   128  
   129  // readContinuedLineSlice reads continued lines from the reader buffer,
   130  // returning a byte slice with all lines. The validateFirstLine function
   131  // is run on the first read line, and if it returns an error then this
   132  // error is returned from readContinuedLineSlice.
   133  // It reads up to lim bytes of data (or unlimited if lim is less than 0).
   134  func (r *Reader) readContinuedLineSlice(lim int64, validateFirstLine func([]byte) error) ([]byte, error) {
   135  	if validateFirstLine == nil {
   136  		return nil, fmt.Errorf("missing validateFirstLine func")
   137  	}
   138  
   139  	// Read the first line.
   140  	line, err := r.readLineSlice(lim)
   141  	if err != nil {
   142  		return nil, err
   143  	}
   144  	if len(line) == 0 { // blank line - no continuation
   145  		return line, nil
   146  	}
   147  
   148  	if err := validateFirstLine(line); err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	// Optimistically assume that we have started to buffer the next line
   153  	// and it starts with an ASCII letter (the next header key), or a blank
   154  	// line, so we can avoid copying that buffered data around in memory
   155  	// and skipping over non-existent whitespace.
   156  	if r.R.Buffered() > 1 {
   157  		peek, _ := r.R.Peek(2)
   158  		if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') ||
   159  			len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' {
   160  			return trim(line), nil
   161  		}
   162  	}
   163  
   164  	// ReadByte or the next readLineSlice will flush the read buffer;
   165  	// copy the slice into buf.
   166  	r.buf = append(r.buf[:0], trim(line)...)
   167  
   168  	if lim < 0 {
   169  		lim = math.MaxInt64
   170  	}
   171  	lim -= int64(len(r.buf))
   172  
   173  	// Read continuation lines.
   174  	for r.skipSpace() > 0 {
   175  		r.buf = append(r.buf, ' ')
   176  		if int64(len(r.buf)) >= lim {
   177  			return nil, errMessageTooLarge
   178  		}
   179  		line, err := r.readLineSlice(lim - int64(len(r.buf)))
   180  		if err != nil {
   181  			break
   182  		}
   183  		r.buf = append(r.buf, trim(line)...)
   184  	}
   185  	return r.buf, nil
   186  }
   187  
   188  // skipSpace skips R over all spaces and returns the number of bytes skipped.
   189  func (r *Reader) skipSpace() int {
   190  	n := 0
   191  	for {
   192  		c, err := r.R.ReadByte()
   193  		if err != nil {
   194  			// Bufio will keep err until next read.
   195  			break
   196  		}
   197  		if c != ' ' && c != '\t' {
   198  			r.R.UnreadByte()
   199  			break
   200  		}
   201  		n++
   202  	}
   203  	return n
   204  }
   205  
   206  func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
   207  	line, err := r.ReadLine()
   208  	if err != nil {
   209  		return
   210  	}
   211  	return parseCodeLine(line, expectCode)
   212  }
   213  
   214  func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
   215  	if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
   216  		err = ProtocolError("short response: " + line)
   217  		return
   218  	}
   219  	continued = line[3] == '-'
   220  	code, err = strconv.Atoi(line[0:3])
   221  	if err != nil || code < 100 {
   222  		err = ProtocolError("invalid response code: " + line)
   223  		return
   224  	}
   225  	message = line[4:]
   226  	if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
   227  		10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
   228  		100 <= expectCode && expectCode < 1000 && code != expectCode {
   229  		err = &Error{code, message}
   230  	}
   231  	return
   232  }
   233  
   234  // ReadCodeLine reads a response code line of the form
   235  //
   236  //	code message
   237  //
   238  // where code is a three-digit status code and the message
   239  // extends to the rest of the line. An example of such a line is:
   240  //
   241  //	220 plan9.bell-labs.com ESMTP
   242  //
   243  // If the prefix of the status does not match the digits in expectCode,
   244  // ReadCodeLine returns with err set to &Error{code, message}.
   245  // For example, if expectCode is 31, an error will be returned if
   246  // the status is not in the range [310,319].
   247  //
   248  // If the response is multi-line, ReadCodeLine returns an error.
   249  //
   250  // An expectCode <= 0 disables the check of the status code.
   251  func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
   252  	code, continued, message, err := r.readCodeLine(expectCode)
   253  	if err == nil && continued {
   254  		err = ProtocolError("unexpected multi-line response: " + message)
   255  	}
   256  	return
   257  }
   258  
   259  // ReadResponse reads a multi-line response of the form:
   260  //
   261  //	code-message line 1
   262  //	code-message line 2
   263  //	...
   264  //	code message line n
   265  //
   266  // where code is a three-digit status code. The first line starts with the
   267  // code and a hyphen. The response is terminated by a line that starts
   268  // with the same code followed by a space. Each line in message is
   269  // separated by a newline (\n).
   270  //
   271  // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for
   272  // details of another form of response accepted:
   273  //
   274  //	code-message line 1
   275  //	message line 2
   276  //	...
   277  //	code message line n
   278  //
   279  // If the prefix of the status does not match the digits in expectCode,
   280  // ReadResponse returns with err set to &Error{code, message}.
   281  // For example, if expectCode is 31, an error will be returned if
   282  // the status is not in the range [310,319].
   283  //
   284  // An expectCode <= 0 disables the check of the status code.
   285  func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
   286  	code, continued, message, err := r.readCodeLine(expectCode)
   287  	multi := continued
   288  	for continued {
   289  		line, err := r.ReadLine()
   290  		if err != nil {
   291  			return 0, "", err
   292  		}
   293  
   294  		var code2 int
   295  		var moreMessage string
   296  		code2, continued, moreMessage, err = parseCodeLine(line, 0)
   297  		if err != nil || code2 != code {
   298  			message += "\n" + strings.TrimRight(line, "\r\n")
   299  			continued = true
   300  			continue
   301  		}
   302  		message += "\n" + moreMessage
   303  	}
   304  	if err != nil && multi && message != "" {
   305  		// replace one line error message with all lines (full message)
   306  		err = &Error{code, message}
   307  	}
   308  	return
   309  }
   310  
   311  // DotReader returns a new [Reader] that satisfies Reads using the
   312  // decoded text of a dot-encoded block read from r.
   313  // The returned Reader is only valid until the next call
   314  // to a method on r.
   315  //
   316  // Dot encoding is a common framing used for data blocks
   317  // in text protocols such as SMTP.  The data consists of a sequence
   318  // of lines, each of which ends in "\r\n".  The sequence itself
   319  // ends at a line containing just a dot: ".\r\n".  Lines beginning
   320  // with a dot are escaped with an additional dot to avoid
   321  // looking like the end of the sequence.
   322  //
   323  // The decoded form returned by the Reader's Read method
   324  // rewrites the "\r\n" line endings into the simpler "\n",
   325  // removes leading dot escapes if present, and stops with error [io.EOF]
   326  // after consuming (and discarding) the end-of-sequence line.
   327  func (r *Reader) DotReader() io.Reader {
   328  	r.closeDot()
   329  	r.dot = &dotReader{r: r}
   330  	return r.dot
   331  }
   332  
   333  type dotReader struct {
   334  	r     *Reader
   335  	state int
   336  }
   337  
   338  // Read satisfies reads by decoding dot-encoded data read from d.r.
   339  func (d *dotReader) Read(b []byte) (n int, err error) {
   340  	// Run data through a simple state machine to
   341  	// elide leading dots, rewrite trailing \r\n into \n,
   342  	// and detect ending .\r\n line.
   343  	const (
   344  		stateBeginLine = iota // beginning of line; initial state; must be zero
   345  		stateDot              // read . at beginning of line
   346  		stateDotCR            // read .\r at beginning of line
   347  		stateCR               // read \r (possibly at end of line)
   348  		stateData             // reading data in middle of line
   349  		stateEOF              // reached .\r\n end marker line
   350  	)
   351  	br := d.r.R
   352  	for n < len(b) && d.state != stateEOF {
   353  		var c byte
   354  		c, err = br.ReadByte()
   355  		if err != nil {
   356  			if err == io.EOF {
   357  				err = io.ErrUnexpectedEOF
   358  			}
   359  			break
   360  		}
   361  		switch d.state {
   362  		case stateBeginLine:
   363  			if c == '.' {
   364  				d.state = stateDot
   365  				continue
   366  			}
   367  			if c == '\r' {
   368  				d.state = stateCR
   369  				continue
   370  			}
   371  			d.state = stateData
   372  
   373  		case stateDot:
   374  			if c == '\r' {
   375  				d.state = stateDotCR
   376  				continue
   377  			}
   378  			if c == '\n' {
   379  				d.state = stateEOF
   380  				continue
   381  			}
   382  			d.state = stateData
   383  
   384  		case stateDotCR:
   385  			if c == '\n' {
   386  				d.state = stateEOF
   387  				continue
   388  			}
   389  			// Not part of .\r\n.
   390  			// Consume leading dot and emit saved \r.
   391  			br.UnreadByte()
   392  			c = '\r'
   393  			d.state = stateData
   394  
   395  		case stateCR:
   396  			if c == '\n' {
   397  				d.state = stateBeginLine
   398  				break
   399  			}
   400  			// Not part of \r\n. Emit saved \r
   401  			br.UnreadByte()
   402  			c = '\r'
   403  			d.state = stateData
   404  
   405  		case stateData:
   406  			if c == '\r' {
   407  				d.state = stateCR
   408  				continue
   409  			}
   410  			if c == '\n' {
   411  				d.state = stateBeginLine
   412  			}
   413  		}
   414  		b[n] = c
   415  		n++
   416  	}
   417  	if err == nil && d.state == stateEOF {
   418  		err = io.EOF
   419  	}
   420  	if err != nil && d.r.dot == d {
   421  		d.r.dot = nil
   422  	}
   423  	return
   424  }
   425  
   426  // closeDot drains the current DotReader if any,
   427  // making sure that it reads until the ending dot line.
   428  func (r *Reader) closeDot() {
   429  	if r.dot == nil {
   430  		return
   431  	}
   432  	buf := make([]byte, 128)
   433  	for r.dot != nil {
   434  		// When Read reaches EOF or an error,
   435  		// it will set r.dot == nil.
   436  		r.dot.Read(buf)
   437  	}
   438  }
   439  
   440  // ReadDotBytes reads a dot-encoding and returns the decoded data.
   441  //
   442  // See the documentation for the [Reader.DotReader] method for details about dot-encoding.
   443  func (r *Reader) ReadDotBytes() ([]byte, error) {
   444  	return io.ReadAll(r.DotReader())
   445  }
   446  
   447  // ReadDotLines reads a dot-encoding and returns a slice
   448  // containing the decoded lines, with the final \r\n or \n elided from each.
   449  //
   450  // See the documentation for the [Reader.DotReader] method for details about dot-encoding.
   451  func (r *Reader) ReadDotLines() ([]string, error) {
   452  	// We could use ReadDotBytes and then Split it,
   453  	// but reading a line at a time avoids needing a
   454  	// large contiguous block of memory and is simpler.
   455  	var v []string
   456  	var err error
   457  	for {
   458  		var line string
   459  		line, err = r.ReadLine()
   460  		if err != nil {
   461  			if err == io.EOF {
   462  				err = io.ErrUnexpectedEOF
   463  			}
   464  			break
   465  		}
   466  
   467  		// Dot by itself marks end; otherwise cut one dot.
   468  		if len(line) > 0 && line[0] == '.' {
   469  			if len(line) == 1 {
   470  				break
   471  			}
   472  			line = line[1:]
   473  		}
   474  		v = append(v, line)
   475  	}
   476  	return v, err
   477  }
   478  
   479  var colon = []byte(":")
   480  
   481  // ReadMIMEHeader reads a MIME-style header from r.
   482  // The header is a sequence of possibly continued Key: Value lines
   483  // ending in a blank line.
   484  // The returned map m maps [CanonicalMIMEHeaderKey](key) to a
   485  // sequence of values in the same order encountered in the input.
   486  //
   487  // For example, consider this input:
   488  //
   489  //	My-Key: Value 1
   490  //	Long-Key: Even
   491  //	       Longer Value
   492  //	My-Key: Value 2
   493  //
   494  // Given that input, ReadMIMEHeader returns the map:
   495  //
   496  //	map[string][]string{
   497  //		"My-Key": {"Value 1", "Value 2"},
   498  //		"Long-Key": {"Even Longer Value"},
   499  //	}
   500  func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
   501  	return readMIMEHeader(r, math.MaxInt64, math.MaxInt64)
   502  }
   503  
   504  // readMIMEHeader is a version of ReadMIMEHeader which takes a limit on the header size.
   505  // It is called by the mime/multipart package.
   506  func readMIMEHeader(r *Reader, maxMemory, maxHeaders int64) (MIMEHeader, error) {
   507  	// Avoid lots of small slice allocations later by allocating one
   508  	// large one ahead of time which we'll cut up into smaller
   509  	// slices. If this isn't big enough later, we allocate small ones.
   510  	var strs []string
   511  	hint := r.upcomingHeaderKeys()
   512  	if hint > 0 {
   513  		if hint > 1000 {
   514  			hint = 1000 // set a cap to avoid overallocation
   515  		}
   516  		strs = make([]string, hint)
   517  	}
   518  
   519  	m := make(MIMEHeader, hint)
   520  
   521  	// Account for 400 bytes of overhead for the MIMEHeader, plus 200 bytes per entry.
   522  	// Benchmarking map creation as of go1.20, a one-entry MIMEHeader is 416 bytes and large
   523  	// MIMEHeaders average about 200 bytes per entry.
   524  	maxMemory -= 400
   525  	const mapEntryOverhead = 200
   526  
   527  	// The first line cannot start with a leading space.
   528  	if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
   529  		const errorLimit = 80 // arbitrary limit on how much of the line we'll quote
   530  		line, err := r.readLineSlice(errorLimit)
   531  		if err != nil {
   532  			return m, err
   533  		}
   534  		return m, ProtocolError("malformed MIME header initial line: " + string(line))
   535  	}
   536  
   537  	for {
   538  		kv, err := r.readContinuedLineSlice(maxMemory, mustHaveFieldNameColon)
   539  		if len(kv) == 0 {
   540  			return m, err
   541  		}
   542  
   543  		// Key ends at first colon.
   544  		k, v, ok := bytes.Cut(kv, colon)
   545  		if !ok {
   546  			return m, ProtocolError("malformed MIME header line: " + string(kv))
   547  		}
   548  		key, ok := canonicalMIMEHeaderKey(k)
   549  		if !ok {
   550  			return m, ProtocolError("malformed MIME header line: " + string(kv))
   551  		}
   552  		for _, c := range v {
   553  			if !validHeaderValueByte(c) {
   554  				return m, ProtocolError("malformed MIME header line: " + string(kv))
   555  			}
   556  		}
   557  
   558  		// As per RFC 7230 field-name is a token, tokens consist of one or more chars.
   559  		// We could return a ProtocolError here, but better to be liberal in what we
   560  		// accept, so if we get an empty key, skip it.
   561  		if key == "" {
   562  			continue
   563  		}
   564  
   565  		maxHeaders--
   566  		if maxHeaders < 0 {
   567  			return nil, errMessageTooLarge
   568  		}
   569  
   570  		// Skip initial spaces in value.
   571  		value := string(bytes.TrimLeft(v, " \t"))
   572  
   573  		vv := m[key]
   574  		if vv == nil {
   575  			maxMemory -= int64(len(key))
   576  			maxMemory -= mapEntryOverhead
   577  		}
   578  		maxMemory -= int64(len(value))
   579  		if maxMemory < 0 {
   580  			return m, errMessageTooLarge
   581  		}
   582  		if vv == nil && len(strs) > 0 {
   583  			// More than likely this will be a single-element key.
   584  			// Most headers aren't multi-valued.
   585  			// Set the capacity on strs[0] to 1, so any future append
   586  			// won't extend the slice into the other strings.
   587  			vv, strs = strs[:1:1], strs[1:]
   588  			vv[0] = value
   589  			m[key] = vv
   590  		} else {
   591  			m[key] = append(vv, value)
   592  		}
   593  
   594  		if err != nil {
   595  			return m, err
   596  		}
   597  	}
   598  }
   599  
   600  // noValidation is a no-op validation func for readContinuedLineSlice
   601  // that permits any lines.
   602  func noValidation(_ []byte) error { return nil }
   603  
   604  // mustHaveFieldNameColon ensures that, per RFC 7230, the
   605  // field-name is on a single line, so the first line must
   606  // contain a colon.
   607  func mustHaveFieldNameColon(line []byte) error {
   608  	if bytes.IndexByte(line, ':') < 0 {
   609  		return ProtocolError(fmt.Sprintf("malformed MIME header: missing colon: %q", line))
   610  	}
   611  	return nil
   612  }
   613  
   614  var nl = []byte("\n")
   615  
   616  // upcomingHeaderKeys returns an approximation of the number of keys
   617  // that will be in this header. If it gets confused, it returns 0.
   618  func (r *Reader) upcomingHeaderKeys() (n int) {
   619  	// Try to determine the 'hint' size.
   620  	r.R.Peek(1) // force a buffer load if empty
   621  	s := r.R.Buffered()
   622  	if s == 0 {
   623  		return
   624  	}
   625  	peek, _ := r.R.Peek(s)
   626  	for len(peek) > 0 && n < 1000 {
   627  		var line []byte
   628  		line, peek, _ = bytes.Cut(peek, nl)
   629  		if len(line) == 0 || (len(line) == 1 && line[0] == '\r') {
   630  			// Blank line separating headers from the body.
   631  			break
   632  		}
   633  		if line[0] == ' ' || line[0] == '\t' {
   634  			// Folded continuation of the previous line.
   635  			continue
   636  		}
   637  		n++
   638  	}
   639  	return n
   640  }
   641  
   642  // CanonicalMIMEHeaderKey returns the canonical format of the
   643  // MIME header key s. The canonicalization converts the first
   644  // letter and any letter following a hyphen to upper case;
   645  // the rest are converted to lowercase. For example, the
   646  // canonical key for "accept-encoding" is "Accept-Encoding".
   647  // MIME header keys are assumed to be ASCII only.
   648  // If s contains a space or invalid header field bytes, it is
   649  // returned without modifications.
   650  func CanonicalMIMEHeaderKey(s string) string {
   651  	// Quick check for canonical encoding.
   652  	upper := true
   653  	for i := 0; i < len(s); i++ {
   654  		c := s[i]
   655  		if !validHeaderFieldByte(c) {
   656  			return s
   657  		}
   658  		if upper && 'a' <= c && c <= 'z' {
   659  			s, _ = canonicalMIMEHeaderKey([]byte(s))
   660  			return s
   661  		}
   662  		if !upper && 'A' <= c && c <= 'Z' {
   663  			s, _ = canonicalMIMEHeaderKey([]byte(s))
   664  			return s
   665  		}
   666  		upper = c == '-'
   667  	}
   668  	return s
   669  }
   670  
   671  const toLower = 'a' - 'A'
   672  
   673  // validHeaderFieldByte reports whether c is a valid byte in a header
   674  // field name. RFC 7230 says:
   675  //
   676  //	header-field   = field-name ":" OWS field-value OWS
   677  //	field-name     = token
   678  //	tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
   679  //	        "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
   680  //	token = 1*tchar
   681  func validHeaderFieldByte(c byte) bool {
   682  	// mask is a 128-bit bitmap with 1s for allowed bytes,
   683  	// so that the byte c can be tested with a shift and an and.
   684  	// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
   685  	// and this function will return false.
   686  	const mask = 0 |
   687  		(1<<(10)-1)<<'0' |
   688  		(1<<(26)-1)<<'a' |
   689  		(1<<(26)-1)<<'A' |
   690  		1<<'!' |
   691  		1<<'#' |
   692  		1<<'$' |
   693  		1<<'%' |
   694  		1<<'&' |
   695  		1<<'\'' |
   696  		1<<'*' |
   697  		1<<'+' |
   698  		1<<'-' |
   699  		1<<'.' |
   700  		1<<'^' |
   701  		1<<'_' |
   702  		1<<'`' |
   703  		1<<'|' |
   704  		1<<'~'
   705  	return ((uint64(1)<<c)&(mask&(1<<64-1)) |
   706  		(uint64(1)<<(c-64))&(mask>>64)) != 0
   707  }
   708  
   709  // validHeaderValueByte reports whether c is a valid byte in a header
   710  // field value. RFC 7230 says:
   711  //
   712  //	field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
   713  //	field-vchar    = VCHAR / obs-text
   714  //	obs-text       = %x80-FF
   715  //
   716  // RFC 5234 says:
   717  //
   718  //	HTAB           =  %x09
   719  //	SP             =  %x20
   720  //	VCHAR          =  %x21-7E
   721  func validHeaderValueByte(c byte) bool {
   722  	// mask is a 128-bit bitmap with 1s for allowed bytes,
   723  	// so that the byte c can be tested with a shift and an and.
   724  	// If c >= 128, then 1<<c and 1<<(c-64) will both be zero.
   725  	// Since this is the obs-text range, we invert the mask to
   726  	// create a bitmap with 1s for disallowed bytes.
   727  	const mask = 0 |
   728  		(1<<(0x7f-0x21)-1)<<0x21 | // VCHAR: %x21-7E
   729  		1<<0x20 | // SP: %x20
   730  		1<<0x09 // HTAB: %x09
   731  	return ((uint64(1)<<c)&^(mask&(1<<64-1)) |
   732  		(uint64(1)<<(c-64))&^(mask>>64)) == 0
   733  }
   734  
   735  // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
   736  // allowed to mutate the provided byte slice before returning the
   737  // string.
   738  //
   739  // For invalid inputs (if a contains spaces or non-token bytes), a
   740  // is unchanged and a string copy is returned.
   741  //
   742  // ok is true if the header key contains only valid characters and spaces.
   743  // ReadMIMEHeader accepts header keys containing spaces, but does not
   744  // canonicalize them.
   745  func canonicalMIMEHeaderKey(a []byte) (_ string, ok bool) {
   746  	// See if a looks like a header key. If not, return it unchanged.
   747  	noCanon := false
   748  	for _, c := range a {
   749  		if validHeaderFieldByte(c) {
   750  			continue
   751  		}
   752  		// Don't canonicalize.
   753  		if c == ' ' {
   754  			// We accept invalid headers with a space before the
   755  			// colon, but must not canonicalize them.
   756  			// See https://go.dev/issue/34540.
   757  			noCanon = true
   758  			continue
   759  		}
   760  		return string(a), false
   761  	}
   762  	if noCanon {
   763  		return string(a), true
   764  	}
   765  
   766  	upper := true
   767  	for i, c := range a {
   768  		// Canonicalize: first letter upper case
   769  		// and upper case after each dash.
   770  		// (Host, User-Agent, If-Modified-Since).
   771  		// MIME headers are ASCII only, so no Unicode issues.
   772  		if upper && 'a' <= c && c <= 'z' {
   773  			c -= toLower
   774  		} else if !upper && 'A' <= c && c <= 'Z' {
   775  			c += toLower
   776  		}
   777  		a[i] = c
   778  		upper = c == '-' // for next time
   779  	}
   780  	commonHeaderOnce.Do(initCommonHeader)
   781  	// The compiler recognizes m[string(byteSlice)] as a special
   782  	// case, so a copy of a's bytes into a new string does not
   783  	// happen in this map lookup:
   784  	if v := commonHeader[string(a)]; v != "" {
   785  		return v, true
   786  	}
   787  	return string(a), true
   788  }
   789  
   790  // commonHeader interns common header strings.
   791  var commonHeader map[string]string
   792  
   793  var commonHeaderOnce sync.Once
   794  
   795  func initCommonHeader() {
   796  	commonHeader = make(map[string]string)
   797  	for _, v := range []string{
   798  		"Accept",
   799  		"Accept-Charset",
   800  		"Accept-Encoding",
   801  		"Accept-Language",
   802  		"Accept-Ranges",
   803  		"Cache-Control",
   804  		"Cc",
   805  		"Connection",
   806  		"Content-Id",
   807  		"Content-Language",
   808  		"Content-Length",
   809  		"Content-Transfer-Encoding",
   810  		"Content-Type",
   811  		"Cookie",
   812  		"Date",
   813  		"Dkim-Signature",
   814  		"Etag",
   815  		"Expires",
   816  		"From",
   817  		"Host",
   818  		"If-Modified-Since",
   819  		"If-None-Match",
   820  		"In-Reply-To",
   821  		"Last-Modified",
   822  		"Location",
   823  		"Message-Id",
   824  		"Mime-Version",
   825  		"Pragma",
   826  		"Received",
   827  		"Return-Path",
   828  		"Server",
   829  		"Set-Cookie",
   830  		"Subject",
   831  		"To",
   832  		"User-Agent",
   833  		"Via",
   834  		"X-Forwarded-For",
   835  		"X-Imforwards",
   836  		"X-Powered-By",
   837  	} {
   838  		commonHeader[v] = v
   839  	}
   840  }
   841
View as plain text