Go Home Page
The Go Programming Language

Source file src/pkg/http/request.go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// HTTP Request reading and parsing.

// The http package implements parsing of HTTP requests, replies,
// and URLs and provides an extensible HTTP server and a basic
// HTTP client.
package http

import (
    "bufio"
    "bytes"
    "container/vector"
    "fmt"
    "io"
    "io/ioutil"
    "mime"
    "mime/multipart"
    "os"
    "strconv"
    "strings"
)

const (
    maxLineLength  = 4096 // assumed <= bufio.defaultBufSize
    maxValueLength = 4096
    maxHeaderLines = 1024
    chunkSize      = 4 << 10 // 4 KB chunks
)

// HTTP request parsing errors.
type ProtocolError struct {
    os.ErrorString
}

var (
    ErrLineTooLong          = &ProtocolError{"header line too long"}
    ErrHeaderTooLong        = &ProtocolError{"header too long"}
    ErrShortBody            = &ProtocolError{"entity body too short"}
    ErrNotSupported         = &ProtocolError{"feature not supported"}
    ErrUnexpectedTrailer    = &ProtocolError{"trailer header without chunked transfer encoding"}
    ErrMissingContentLength = &ProtocolError{"missing ContentLength in HEAD response"}
    ErrNotMultipart         = &ProtocolError{"request Content-Type isn't multipart/form-data"}
    ErrMissingBoundary      = &ProtocolError{"no multipart boundary param Content-Type"}
)

type badStringError struct {
    what string
    str  string
}

func (e *badStringError) String() string { return fmt.Sprintf("%s %q", e.what, e.str) }

var reqExcludeHeader = map[string]bool{
    "Host":              true,
    "User-Agent":        true,
    "Referer":           true,
    "Content-Length":    true,
    "Transfer-Encoding": true,
    "Trailer":           true,
}

// A Request represents a parsed HTTP request header.
type Request struct {
    Method     string // GET, POST, PUT, etc.
    RawURL     string // The raw URL given in the request.
    URL        *URL   // Parsed URL.
    Proto      string // "HTTP/1.0"
    ProtoMajor int    // 1
    ProtoMinor int    // 0

    // A header mapping request lines to their values.
    // If the header says
    //
    //	accept-encoding: gzip, deflate
    //	Accept-Language: en-us
    //	Connection: keep-alive
    //
    // then
    //
    //	Header = map[string]string{
    //		"Accept-Encoding": "gzip, deflate",
    //		"Accept-Language": "en-us",
    //		"Connection": "keep-alive",
    //	}
    //
    // HTTP defines that header names are case-insensitive.
    // The request parser implements this by canonicalizing the
    // name, making the first character and any characters
    // following a hyphen uppercase and the rest lowercase.
    Header map[string]string

    // The message body.
    Body io.ReadCloser

    // ContentLength records the length of the associated content.
    // The value -1 indicates that the length is unknown.
    // Values >= 0 indicate that the given number of bytes may be read from Body.
    ContentLength int64

    // TransferEncoding lists the transfer encodings from outermost to innermost.
    // An empty list denotes the "identity" encoding.
    TransferEncoding []string

    // Whether to close the connection after replying to this request.
    Close bool

    // The host on which the URL is sought.
    // Per RFC 2616, this is either the value of the Host: header
    // or the host name given in the URL itself.
    Host string

    // The referring URL, if sent in the request.
    //
    // Referer is misspelled as in the request itself,
    // a mistake from the earliest days of HTTP.
    // This value can also be fetched from the Header map
    // as Header["Referer"]; the benefit of making it
    // available as a structure field is that the compiler
    // can diagnose programs that use the alternate
    // (correct English) spelling req.Referrer but cannot
    // diagnose programs that use Header["Referrer"].
    Referer string

    // The User-Agent: header string, if sent in the request.
    UserAgent string

    // The parsed form. Only available after ParseForm is called.
    Form map[string][]string

    // Trailer maps trailer keys to values.  Like for Header, if the
    // response has multiple trailer lines with the same key, they will be
    // concatenated, delimited by commas.
    Trailer map[string]string
}

// ProtoAtLeast returns whether the HTTP protocol used
// in the request is at least major.minor.
func (r *Request) ProtoAtLeast(major, minor int) bool {
    return r.ProtoMajor > major ||
        r.ProtoMajor == major && r.ProtoMinor >= minor
}

// MultipartReader returns a MIME multipart reader if this is a
// multipart/form-data POST request, else returns nil and an error.
func (r *Request) MultipartReader() (multipart.Reader, os.Error) {
    v, ok := r.Header["Content-Type"]
    if !ok {
        return nil, ErrNotMultipart
    }
    d, params := mime.ParseMediaType(v)
    if d != "multipart/form-data" {
        return nil, ErrNotMultipart
    }
    boundary, ok := params["boundary"]
    if !ok {
        return nil, ErrMissingBoundary
    }
    return multipart.NewReader(r.Body, boundary), nil
}

// Return value if nonempty, def otherwise.
func valueOrDefault(value, def string) string {
    if value != "" {
        return value
    }
    return def
}

const defaultUserAgent = "Go http package"

// Write writes an HTTP/1.1 request -- header and body -- in wire format.
// This method consults the following fields of req:
//	Host
//	RawURL, if non-empty, or else URL
//	Method (defaults to "GET")
//	UserAgent (defaults to defaultUserAgent)
//	Referer
//	Header
//	Body
//
// If Body is present, Write forces "Transfer-Encoding: chunked" as a header
// and then closes Body when finished sending it.
func (req *Request) Write(w io.Writer) os.Error {
    host := req.Host
    if host == "" {
        host = req.URL.Host
    }

    uri := req.RawURL
    if uri == "" {
        uri = valueOrDefault(urlEscape(req.URL.Path, false), "/")
        if req.URL.RawQuery != "" {
            uri += "?" + req.URL.RawQuery
        }
    }

    fmt.Fprintf(w, "%s %s HTTP/1.1\r\n", valueOrDefault(req.Method, "GET"), uri)

    // Header lines
    fmt.Fprintf(w, "Host: %s\r\n", host)
    fmt.Fprintf(w, "User-Agent: %s\r\n", valueOrDefault(req.UserAgent, defaultUserAgent))
    if req.Referer != "" {
        fmt.Fprintf(w, "Referer: %s\r\n", req.Referer)
    }

    // Process Body,ContentLength,Close,Trailer
    tw, err := newTransferWriter(req)
    if err != nil {
        return err
    }
    err = tw.WriteHeader(w)
    if err != nil {
        return err
    }

    // TODO: split long values?  (If so, should share code with Conn.Write)
    // TODO: if Header includes values for Host, User-Agent, or Referer, this
    // may conflict with the User-Agent or Referer headers we add manually.
    // One solution would be to remove the Host, UserAgent, and Referer fields
    // from Request, and introduce Request methods along the lines of
    // Response.{GetHeader,AddHeader} and string constants for "Host",
    // "User-Agent" and "Referer".
    err = writeSortedKeyValue(w, req.Header, reqExcludeHeader)
    if err != nil {
        return err
    }

    io.WriteString(w, "\r\n")

    // Write body and trailer
    err = tw.WriteBody(w)
    if err != nil {
        return err
    }

    return nil
}

// Read a line of bytes (up to \n) from b.
// Give up if the line exceeds maxLineLength.
// The returned bytes are a pointer into storage in
// the bufio, so they are only valid until the next bufio read.
func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) {
    if p, err = b.ReadSlice('\n'); err != nil {
        // We always know when EOF is coming.
        // If the caller asked for a line, there should be a line.
        if err == os.EOF {
            err = io.ErrUnexpectedEOF
        }
        return nil, err
    }
    if len(p) >= maxLineLength {
        return nil, ErrLineTooLong
    }

    // Chop off trailing white space.
    var i int
    for i = len(p); i > 0; i-- {
        if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' {
            break
        }
    }
    return p[0:i], nil
}

// readLineBytes, but convert the bytes into a string.
func readLine(b *bufio.Reader) (s string, err os.Error) {
    p, e := readLineBytes(b)
    if e != nil {
        return "", e
    }
    return string(p), nil
}

var colon = []byte{':'}

// Read a key/value pair from b.
// A key/value has the form Key: Value\r\n
// and the Value can continue on multiple lines if each continuation line
// starts with a space.
func readKeyValue(b *bufio.Reader) (key, value string, err os.Error) {
    line, e := readLineBytes(b)
    if e != nil {
        return "", "", e
    }
    if len(line) == 0 {
        return "", "", nil
    }

    // Scan first line for colon.
    i := bytes.Index(line, colon)
    if i < 0 {
        goto Malformed
    }

    key = string(line[0:i])
    if strings.Index(key, " ") >= 0 {
        // Key field has space - no good.
        goto Malformed
    }

    // Skip initial space before value.
    for i++; i < len(line); i++ {
        if line[i] != ' ' {
            break
        }
    }
    value = string(line[i:])

    // Look for extension lines, which must begin with space.
    for {
        c, e := b.ReadByte()
        if c != ' ' {
            if e != os.EOF {
                b.UnreadByte()
            }
            break
        }

        // Eat leading space.
        for c == ' ' {
            if c, e = b.ReadByte(); e != nil {
                if e == os.EOF {
                    e = io.ErrUnexpectedEOF
                }
                return "", "", e
            }
        }
        b.UnreadByte()

        // Read the rest of the line and add to value.
        if line, e = readLineBytes(b); e != nil {
            return "", "", e
        }
        value += " " + string(line)

        if len(value) >= maxValueLength {
            return "", "", &badStringError{"value too long for key", key}
        }
    }
    return key, value, nil

Malformed:
    return "", "", &badStringError{"malformed header line", string(line)}
}

// Convert decimal at s[i:len(s)] to integer,
// returning value, string position where the digits stopped,
// and whether there was a valid number (digits, not too big).
func atoi(s string, i int) (n, i1 int, ok bool) {
    const Big = 1000000
    if i >= len(s) || s[i] < '0' || s[i] > '9' {
        return 0, 0, false
    }
    n = 0
    for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
        n = n*10 + int(s[i]-'0')
        if n > Big {
            return 0, 0, false
        }
    }
    return n, i, true
}

// Parse HTTP version: "HTTP/1.2" -> (1, 2, true).
func parseHTTPVersion(vers string) (int, int, bool) {
    if len(vers) < 5 || vers[0:5] != "HTTP/" {
        return 0, 0, false
    }
    major, i, ok := atoi(vers, 5)
    if !ok || i >= len(vers) || vers[i] != '.' {
        return 0, 0, false
    }
    var minor int
    minor, i, ok = atoi(vers, i+1)
    if !ok || i != len(vers) {
        return 0, 0, false
    }
    return major, minor, true
}

var cmap = make(map[string]string)

// CanonicalHeaderKey returns the canonical format of the
// HTTP header key s.  The canonicalization converts the first
// letter and any letter following a hyphen to upper case;
// the rest are converted to lowercase.  For example, the
// canonical key for "accept-encoding" is "Accept-Encoding".
func CanonicalHeaderKey(s string) string {
    if t, ok := cmap[s]; ok {
        return t
    }

    // canonicalize: first letter upper case
    // and upper case after each dash.
    // (Host, User-Agent, If-Modified-Since).
    // HTTP headers are ASCII only, so no Unicode issues.
    a := []byte(s)
    upper := true
    for i, v := range a {
        if upper && 'a' <= v && v <= 'z' {
            a[i] = v + 'A' - 'a'
        }
        if !upper && 'A' <= v && v <= 'Z' {
            a[i] = v + 'a' - 'A'
        }
        upper = false
        if v == '-' {
            upper = true
        }
    }
    t := string(a)
    cmap[s] = t
    return t
}

type chunkedReader struct {
    r   *bufio.Reader
    n   uint64 // unread bytes in chunk
    err os.Error
}

func newChunkedReader(r *bufio.Reader) *chunkedReader {
    return &chunkedReader{r: r}
}

func (cr *chunkedReader) beginChunk() {
    // chunk-size CRLF
    var line string
    line, cr.err = readLine(cr.r)
    if cr.err != nil {
        return
    }
    cr.n, cr.err = strconv.Btoui64(line, 16)
    if cr.err != nil {
        return
    }
    if cr.n == 0 {
        // trailer CRLF
        for {
            line, cr.err = readLine(cr.r)
            if cr.err != nil {
                return
            }
            if line == "" {
                break
            }
        }
        cr.err = os.EOF
    }
}

func (cr *chunkedReader) Read(b []uint8) (n int, err os.Error) {
    if cr.err != nil {
        return 0, cr.err
    }
    if cr.n == 0 {
        cr.beginChunk()
        if cr.err != nil {
            return 0, cr.err
        }
    }
    if uint64(len(b)) > cr.n {
        b = b[0:cr.n]
    }
    n, cr.err = cr.r.Read(b)
    cr.n -= uint64(n)
    if cr.n == 0 && cr.err == nil {
        // end of chunk (CRLF)
        b := make([]byte, 2)
        if _, cr.err = io.ReadFull(cr.r, b); cr.err == nil {
            if b[0] != '\r' || b[1] != '\n' {
                cr.err = os.NewError("malformed chunked encoding")
            }
        }
    }
    return n, cr.err
}

// ReadRequest reads and parses a request from b.
func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
    req = new(Request)

    // First line: GET /index.html HTTP/1.0
    var s string
    if s, err = readLine(b); err != nil {
        return nil, err
    }

    var f []string
    if f = strings.Split(s, " ", 3); len(f) < 3 {
        return nil, &badStringError{"malformed HTTP request", s}
    }
    req.Method, req.RawURL, req.Proto = f[0], f[1], f[2]
    var ok bool
    if req.ProtoMajor, req.ProtoMinor, ok = parseHTTPVersion(req.Proto); !ok {
        return nil, &badStringError{"malformed HTTP version", req.Proto}
    }

    if req.URL, err = ParseURL(req.RawURL); err != nil {
        return nil, err
    }

    // Subsequent lines: Key: value.
    nheader := 0
    req.Header = make(map[string]string)
    for {
        var key, value string
        if key, value, err = readKeyValue(b); err != nil {
            return nil, err
        }
        if key == "" {
            break
        }
        if nheader++; nheader >= maxHeaderLines {
            return nil, ErrHeaderTooLong
        }

        key = CanonicalHeaderKey(key)

        // RFC 2616 says that if you send the same header key
        // multiple times, it has to be semantically equivalent
        // to concatenating the values separated by commas.
        oldvalue, present := req.Header[key]
        if present {
            req.Header[key] = oldvalue + "," + value
        } else {
            req.Header[key] = value
        }
    }

    // RFC2616: Must treat
    //	GET /index.html HTTP/1.1
    //	Host: www.google.com
    // and
    //	GET http://www.google.com/index.html HTTP/1.1
    //	Host: doesntmatter
    // the same.  In the second case, any Host line is ignored.
    req.Host = req.URL.Host
    if req.Host == "" {
        req.Host = req.Header["Host"]
    }
    req.Header["Host"] = "", false

    fixPragmaCacheControl(req.Header)

    // Pull out useful fields as a convenience to clients.
    req.Referer = req.Header["Referer"]
    req.Header["Referer"] = "", false

    req.UserAgent = req.Header["User-Agent"]
    req.Header["User-Agent"] = "", false

    // TODO: Parse specific header values:
    //	Accept
    //	Accept-Encoding
    //	Accept-Language
    //	Authorization
    //	Cache-Control
    //	Connection
    //	Date
    //	Expect
    //	From
    //	If-Match
    //	If-Modified-Since
    //	If-None-Match
    //	If-Range
    //	If-Unmodified-Since
    //	Max-Forwards
    //	Proxy-Authorization
    //	Referer [sic]
    //	TE (transfer-codings)
    //	Trailer
    //	Transfer-Encoding
    //	Upgrade
    //	User-Agent
    //	Via
    //	Warning

    err = readTransfer(req, b)
    if err != nil {
        return nil, err
    }

    return req, nil
}

func ParseQuery(query string) (m map[string][]string, err os.Error) {
    m = make(map[string][]string)
    for _, kv := range strings.Split(query, "&", -1) {
        kvPair := strings.Split(kv, "=", 2)

        var key, value string
        var e os.Error
        key, e = URLUnescape(kvPair[0])
        if e == nil && len(kvPair) > 1 {
            value, e = URLUnescape(kvPair[1])
        }
        if e != nil {
            err = e
        }

        vec := vector.StringVector(m[key])
        vec.Push(value)
        m[key] = vec
    }

    return
}

// ParseForm parses the request body as a form for POST requests, or the raw query for GET requests.
// It is idempotent.
func (r *Request) ParseForm() (err os.Error) {
    if r.Form != nil {
        return
    }

    var query string
    switch r.Method {
    case "GET":
        query = r.URL.RawQuery
    case "POST":
        if r.Body == nil {
            r.Form = make(map[string][]string)
            return os.ErrorString("missing form body")
        }
        ct := r.Header["Content-Type"]
        switch strings.Split(ct, ";", 2)[0] {
        case "text/plain", "application/x-www-form-urlencoded", "":
            var b []byte
            if b, err = ioutil.ReadAll(r.Body); err != nil {
                r.Form = make(map[string][]string)
                return err
            }
            query = string(b)
        // TODO(dsymonds): Handle multipart/form-data
        default:
            r.Form = make(map[string][]string)
            return &badStringError{"unknown Content-Type", ct}
        }
    }
    r.Form, err = ParseQuery(query)
    return
}

// FormValue returns the first value for the named component of the query.
// FormValue calls ParseForm if necessary.
func (r *Request) FormValue(key string) string {
    if r.Form == nil {
        r.ParseForm()
    }
    if vs := r.Form[key]; len(vs) > 0 {
        return vs[0]
    }
    return ""
}

func (r *Request) expectsContinue() bool {
    expectation, ok := r.Header["Expect"]
    return ok && strings.ToLower(expectation) == "100-continue"
}