The Go Programming Language

Source file src/pkg/net/textproto/reader.go

     1	// Copyright 2010 The Go Authors.  All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package textproto
     6	
     7	import (
     8		"bufio"
     9		"bytes"
    10		"io"
    11		"io/ioutil"
    12		"os"
    13		"strconv"
    14	)
    15	
    16	// BUG(rsc): To let callers manage exposure to denial of service
    17	// attacks, Reader should allow them to set and reset a limit on
    18	// the number of bytes read from the connection.
    19	
    20	// A Reader implements convenience methods for reading requests
    21	// or responses from a text protocol network connection.
    22	type Reader struct {
    23		R   *bufio.Reader
    24		dot *dotReader
    25	}
    26	
    27	// NewReader returns a new Reader reading from r.
    28	func NewReader(r *bufio.Reader) *Reader {
    29		return &Reader{R: r}
    30	}
    31	
    32	// ReadLine reads a single line from r,
    33	// eliding the final \n or \r\n from the returned string.
    34	func (r *Reader) ReadLine() (string, os.Error) {
    35		line, err := r.readLineSlice()
    36		return string(line), err
    37	}
    38	
    39	// ReadLineBytes is like ReadLine but returns a []byte instead of a string.
    40	func (r *Reader) ReadLineBytes() ([]byte, os.Error) {
    41		line, err := r.readLineSlice()
    42		if line != nil {
    43			buf := make([]byte, len(line))
    44			copy(buf, line)
    45			line = buf
    46		}
    47		return line, err
    48	}
    49	
    50	func (r *Reader) readLineSlice() ([]byte, os.Error) {
    51		r.closeDot()
    52		line, _, err := r.R.ReadLine()
    53		return line, err
    54	}
    55	
    56	// ReadContinuedLine reads a possibly continued line from r,
    57	// eliding the final trailing ASCII white space.
    58	// Lines after the first are considered continuations if they
    59	// begin with a space or tab character.  In the returned data,
    60	// continuation lines are separated from the previous line
    61	// only by a single space: the newline and leading white space
    62	// are removed.
    63	//
    64	// For example, consider this input:
    65	//
    66	//	Line 1
    67	//	  continued...
    68	//	Line 2
    69	//
    70	// The first call to ReadContinuedLine will return "Line 1 continued..."
    71	// and the second will return "Line 2".
    72	//
    73	// A line consisting of only white space is never continued.
    74	//
    75	func (r *Reader) ReadContinuedLine() (string, os.Error) {
    76		line, err := r.readContinuedLineSlice()
    77		return string(line), err
    78	}
    79	
    80	// trim returns s with leading and trailing spaces and tabs removed.
    81	// It does not assume Unicode or UTF-8.
    82	func trim(s []byte) []byte {
    83		i := 0
    84		for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
    85			i++
    86		}
    87		n := len(s)
    88		for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
    89			n--
    90		}
    91		return s[i:n]
    92	}
    93	
    94	// ReadContinuedLineBytes is like ReadContinuedLine but
    95	// returns a []byte instead of a string.
    96	func (r *Reader) ReadContinuedLineBytes() ([]byte, os.Error) {
    97		line, err := r.readContinuedLineSlice()
    98		if line != nil {
    99			buf := make([]byte, len(line))
   100			copy(buf, line)
   101			line = buf
   102		}
   103		return line, err
   104	}
   105	
   106	func (r *Reader) readContinuedLineSlice() ([]byte, os.Error) {
   107		// Read the first line.
   108		line, err := r.readLineSlice()
   109		if err != nil {
   110			return line, err
   111		}
   112		if len(line) == 0 { // blank line - no continuation
   113			return line, nil
   114		}
   115		line = trim(line)
   116	
   117		copied := false
   118		if r.R.Buffered() < 1 {
   119			// ReadByte will flush the buffer; make a copy of the slice.
   120			copied = true
   121			line = append([]byte(nil), line...)
   122		}
   123	
   124		// Look for a continuation line.
   125		c, err := r.R.ReadByte()
   126		if err != nil {
   127			// Delay err until we read the byte next time.
   128			return line, nil
   129		}
   130		if c != ' ' && c != '\t' {
   131			// Not a continuation.
   132			r.R.UnreadByte()
   133			return line, nil
   134		}
   135	
   136		if !copied {
   137			// The next readLineSlice will invalidate the previous one.
   138			line = append(make([]byte, 0, len(line)*2), line...)
   139		}
   140	
   141		// Read continuation lines.
   142		for {
   143			// Consume leading spaces; one already gone.
   144			for {
   145				c, err = r.R.ReadByte()
   146				if err != nil {
   147					break
   148				}
   149				if c != ' ' && c != '\t' {
   150					r.R.UnreadByte()
   151					break
   152				}
   153			}
   154			var cont []byte
   155			cont, err = r.readLineSlice()
   156			cont = trim(cont)
   157			line = append(line, ' ')
   158			line = append(line, cont...)
   159			if err != nil {
   160				break
   161			}
   162	
   163			// Check for leading space on next line.
   164			if c, err = r.R.ReadByte(); err != nil {
   165				break
   166			}
   167			if c != ' ' && c != '\t' {
   168				r.R.UnreadByte()
   169				break
   170			}
   171		}
   172	
   173		// Delay error until next call.
   174		if len(line) > 0 {
   175			err = nil
   176		}
   177		return line, err
   178	}
   179	
   180	func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err os.Error) {
   181		line, err := r.ReadLine()
   182		if err != nil {
   183			return
   184		}
   185		if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
   186			err = ProtocolError("short response: " + line)
   187			return
   188		}
   189		continued = line[3] == '-'
   190		code, err = strconv.Atoi(line[0:3])
   191		if err != nil || code < 100 {
   192			err = ProtocolError("invalid response code: " + line)
   193			return
   194		}
   195		message = line[4:]
   196		if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
   197			10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
   198			100 <= expectCode && expectCode < 1000 && code != expectCode {
   199			err = &Error{code, message}
   200		}
   201		return
   202	}
   203	
   204	// ReadCodeLine reads a response code line of the form
   205	//	code message
   206	// where code is a 3-digit status code and the message
   207	// extends to the rest of the line.  An example of such a line is:
   208	//	220 plan9.bell-labs.com ESMTP
   209	//
   210	// If the prefix of the status does not match the digits in expectCode,
   211	// ReadCodeLine returns with err set to &Error{code, message}.
   212	// For example, if expectCode is 31, an error will be returned if
   213	// the status is not in the range [310,319].
   214	//
   215	// If the response is multi-line, ReadCodeLine returns an error.
   216	//
   217	// An expectCode <= 0 disables the check of the status code.
   218	//
   219	func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err os.Error) {
   220		code, continued, message, err := r.readCodeLine(expectCode)
   221		if err == nil && continued {
   222			err = ProtocolError("unexpected multi-line response: " + message)
   223		}
   224		return
   225	}
   226	
   227	// ReadResponse reads a multi-line response of the form
   228	//	code-message line 1
   229	//	code-message line 2
   230	//	...
   231	//	code message line n
   232	// where code is a 3-digit status code. Each line should have the same code.
   233	// The response is terminated by a line that uses a space between the code and
   234	// the message line rather than a dash. Each line in message is separated by
   235	// a newline (\n).
   236	//
   237	// If the prefix of the status does not match the digits in expectCode,
   238	// ReadResponse returns with err set to &Error{code, message}.
   239	// For example, if expectCode is 31, an error will be returned if
   240	// the status is not in the range [310,319].
   241	//
   242	// An expectCode <= 0 disables the check of the status code.
   243	//
   244	func (r *Reader) ReadResponse(expectCode int) (code int, message string, err os.Error) {
   245		code, continued, message, err := r.readCodeLine(expectCode)
   246		for err == nil && continued {
   247			var code2 int
   248			var moreMessage string
   249			code2, continued, moreMessage, err = r.readCodeLine(expectCode)
   250			if code != code2 {
   251				err = ProtocolError("status code mismatch: " + strconv.Itoa(code) + ", " + strconv.Itoa(code2))
   252			}
   253			message += "\n" + moreMessage
   254		}
   255		return
   256	}
   257	
   258	// DotReader returns a new Reader that satisfies Reads using the
   259	// decoded text of a dot-encoded block read from r.
   260	// The returned Reader is only valid until the next call
   261	// to a method on r.
   262	//
   263	// Dot encoding is a common framing used for data blocks
   264	// in text protocols such as SMTP.  The data consists of a sequence
   265	// of lines, each of which ends in "\r\n".  The sequence itself
   266	// ends at a line containing just a dot: ".\r\n".  Lines beginning
   267	// with a dot are escaped with an additional dot to avoid
   268	// looking like the end of the sequence.
   269	//
   270	// The decoded form returned by the Reader's Read method
   271	// rewrites the "\r\n" line endings into the simpler "\n",
   272	// removes leading dot escapes if present, and stops with error os.EOF
   273	// after consuming (and discarding) the end-of-sequence line.
   274	func (r *Reader) DotReader() io.Reader {
   275		r.closeDot()
   276		r.dot = &dotReader{r: r}
   277		return r.dot
   278	}
   279	
   280	type dotReader struct {
   281		r     *Reader
   282		state int
   283	}
   284	
   285	// Read satisfies reads by decoding dot-encoded data read from d.r.
   286	func (d *dotReader) Read(b []byte) (n int, err os.Error) {
   287		// Run data through a simple state machine to
   288		// elide leading dots, rewrite trailing \r\n into \n,
   289		// and detect ending .\r\n line.
   290		const (
   291			stateBeginLine = iota // beginning of line; initial state; must be zero
   292			stateDot              // read . at beginning of line
   293			stateDotCR            // read .\r at beginning of line
   294			stateCR               // read \r (possibly at end of line)
   295			stateData             // reading data in middle of line
   296			stateEOF              // reached .\r\n end marker line
   297		)
   298		br := d.r.R
   299		for n < len(b) && d.state != stateEOF {
   300			var c byte
   301			c, err = br.ReadByte()
   302			if err != nil {
   303				if err == os.EOF {
   304					err = io.ErrUnexpectedEOF
   305				}
   306				break
   307			}
   308			switch d.state {
   309			case stateBeginLine:
   310				if c == '.' {
   311					d.state = stateDot
   312					continue
   313				}
   314				if c == '\r' {
   315					d.state = stateCR
   316					continue
   317				}
   318				d.state = stateData
   319	
   320			case stateDot:
   321				if c == '\r' {
   322					d.state = stateDotCR
   323					continue
   324				}
   325				if c == '\n' {
   326					d.state = stateEOF
   327					continue
   328				}
   329				d.state = stateData
   330	
   331			case stateDotCR:
   332				if c == '\n' {
   333					d.state = stateEOF
   334					continue
   335				}
   336				// Not part of .\r\n.
   337				// Consume leading dot and emit saved \r.
   338				br.UnreadByte()
   339				c = '\r'
   340				d.state = stateData
   341	
   342			case stateCR:
   343				if c == '\n' {
   344					d.state = stateBeginLine
   345					break
   346				}
   347				// Not part of \r\n.  Emit saved \r
   348				br.UnreadByte()
   349				c = '\r'
   350				d.state = stateData
   351	
   352			case stateData:
   353				if c == '\r' {
   354					d.state = stateCR
   355					continue
   356				}
   357				if c == '\n' {
   358					d.state = stateBeginLine
   359				}
   360			}
   361			b[n] = c
   362			n++
   363		}
   364		if err == nil && d.state == stateEOF {
   365			err = os.EOF
   366		}
   367		if err != nil && d.r.dot == d {
   368			d.r.dot = nil
   369		}
   370		return
   371	}
   372	
   373	// closeDot drains the current DotReader if any,
   374	// making sure that it reads until the ending dot line.
   375	func (r *Reader) closeDot() {
   376		if r.dot == nil {
   377			return
   378		}
   379		buf := make([]byte, 128)
   380		for r.dot != nil {
   381			// When Read reaches EOF or an error,
   382			// it will set r.dot == nil.
   383			r.dot.Read(buf)
   384		}
   385	}
   386	
   387	// ReadDotBytes reads a dot-encoding and returns the decoded data.
   388	//
   389	// See the documentation for the DotReader method for details about dot-encoding.
   390	func (r *Reader) ReadDotBytes() ([]byte, os.Error) {
   391		return ioutil.ReadAll(r.DotReader())
   392	}
   393	
   394	// ReadDotLines reads a dot-encoding and returns a slice
   395	// containing the decoded lines, with the final \r\n or \n elided from each.
   396	//
   397	// See the documentation for the DotReader method for details about dot-encoding.
   398	func (r *Reader) ReadDotLines() ([]string, os.Error) {
   399		// We could use ReadDotBytes and then Split it,
   400		// but reading a line at a time avoids needing a
   401		// large contiguous block of memory and is simpler.
   402		var v []string
   403		var err os.Error
   404		for {
   405			var line string
   406			line, err = r.ReadLine()
   407			if err != nil {
   408				if err == os.EOF {
   409					err = io.ErrUnexpectedEOF
   410				}
   411				break
   412			}
   413	
   414			// Dot by itself marks end; otherwise cut one dot.
   415			if len(line) > 0 && line[0] == '.' {
   416				if len(line) == 1 {
   417					break
   418				}
   419				line = line[1:]
   420			}
   421			v = append(v, line)
   422		}
   423		return v, err
   424	}
   425	
   426	// ReadMIMEHeader reads a MIME-style header from r.
   427	// The header is a sequence of possibly continued Key: Value lines
   428	// ending in a blank line.
   429	// The returned map m maps CanonicalMIMEHeaderKey(key) to a
   430	// sequence of values in the same order encountered in the input.
   431	//
   432	// For example, consider this input:
   433	//
   434	//	My-Key: Value 1
   435	//	Long-Key: Even
   436	//	       Longer Value
   437	//	My-Key: Value 2
   438	//
   439	// Given that input, ReadMIMEHeader returns the map:
   440	//
   441	//	map[string][]string{
   442	//		"My-Key": {"Value 1", "Value 2"},
   443	//		"Long-Key": {"Even Longer Value"},
   444	//	}
   445	//
   446	func (r *Reader) ReadMIMEHeader() (MIMEHeader, os.Error) {
   447		m := make(MIMEHeader)
   448		for {
   449			kv, err := r.readContinuedLineSlice()
   450			if len(kv) == 0 {
   451				return m, err
   452			}
   453	
   454			// Key ends at first colon; must not have spaces.
   455			i := bytes.IndexByte(kv, ':')
   456			if i < 0 || bytes.IndexByte(kv[0:i], ' ') >= 0 {
   457				return m, ProtocolError("malformed MIME header line: " + string(kv))
   458			}
   459			key := CanonicalMIMEHeaderKey(string(kv[0:i]))
   460	
   461			// Skip initial spaces in value.
   462			i++ // skip colon
   463			for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
   464				i++
   465			}
   466			value := string(kv[i:])
   467	
   468			m[key] = append(m[key], value)
   469	
   470			if err != nil {
   471				return m, err
   472			}
   473		}
   474		panic("unreachable")
   475	}
   476	
   477	// CanonicalMIMEHeaderKey returns the canonical format of the
   478	// MIME header key s.  The canonicalization converts the first
   479	// letter and any letter following a hyphen to upper case;
   480	// the rest are converted to lowercase.  For example, the
   481	// canonical key for "accept-encoding" is "Accept-Encoding".
   482	func CanonicalMIMEHeaderKey(s string) string {
   483		// Quick check for canonical encoding.
   484		needUpper := true
   485		for i := 0; i < len(s); i++ {
   486			c := s[i]
   487			if needUpper && 'a' <= c && c <= 'z' {
   488				goto MustRewrite
   489			}
   490			if !needUpper && 'A' <= c && c <= 'Z' {
   491				goto MustRewrite
   492			}
   493			needUpper = c == '-'
   494		}
   495		return s
   496	
   497	MustRewrite:
   498		// Canonicalize: first letter upper case
   499		// and upper case after each dash.
   500		// (Host, User-Agent, If-Modified-Since).
   501		// MIME headers are ASCII only, so no Unicode issues.
   502		a := []byte(s)
   503		upper := true
   504		for i, v := range a {
   505			if upper && 'a' <= v && v <= 'z' {
   506				a[i] = v + 'A' - 'a'
   507			}
   508			if !upper && 'A' <= v && v <= 'Z' {
   509				a[i] = v + 'a' - 'A'
   510			}
   511			upper = v == '-'
   512		}
   513		return string(a)
   514	}

release.r60.3. Except as noted, this content is licensed under a Creative Commons Attribution 3.0 License.