Run Format

Source file src/pkg/net/url/url.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package url parses URLs and implements query escaping.
     6	// See RFC 3986.
     7	package url
     8	
     9	import (
    10		"bytes"
    11		"errors"
    12		"sort"
    13		"strconv"
    14		"strings"
    15	)
    16	
    17	// Error reports an error and the operation and URL that caused it.
    18	type Error struct {
    19		Op  string
    20		URL string
    21		Err error
    22	}
    23	
    24	func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
    25	
    26	func ishex(c byte) bool {
    27		switch {
    28		case '0' <= c && c <= '9':
    29			return true
    30		case 'a' <= c && c <= 'f':
    31			return true
    32		case 'A' <= c && c <= 'F':
    33			return true
    34		}
    35		return false
    36	}
    37	
    38	func unhex(c byte) byte {
    39		switch {
    40		case '0' <= c && c <= '9':
    41			return c - '0'
    42		case 'a' <= c && c <= 'f':
    43			return c - 'a' + 10
    44		case 'A' <= c && c <= 'F':
    45			return c - 'A' + 10
    46		}
    47		return 0
    48	}
    49	
    50	type encoding int
    51	
    52	const (
    53		encodePath encoding = 1 + iota
    54		encodeUserPassword
    55		encodeQueryComponent
    56		encodeFragment
    57	)
    58	
    59	type EscapeError string
    60	
    61	func (e EscapeError) Error() string {
    62		return "invalid URL escape " + strconv.Quote(string(e))
    63	}
    64	
    65	// Return true if the specified character should be escaped when
    66	// appearing in a URL string, according to RFC 3986.
    67	// When 'all' is true the full range of reserved characters are matched.
    68	func shouldEscape(c byte, mode encoding) bool {
    69		// §2.3 Unreserved characters (alphanum)
    70		if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
    71			return false
    72		}
    73	
    74		switch c {
    75		case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
    76			return false
    77	
    78		case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
    79			// Different sections of the URL allow a few of
    80			// the reserved characters to appear unescaped.
    81			switch mode {
    82			case encodePath: // §3.3
    83				// The RFC allows : @ & = + $ but saves / ; , for assigning
    84				// meaning to individual path segments. This package
    85				// only manipulates the path as a whole, so we allow those
    86				// last two as well. That leaves only ? to escape.
    87				return c == '?'
    88	
    89			case encodeUserPassword: // §3.2.2
    90				// The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
    91				// The parsing of userinfo treats : as special so we must escape that too.
    92				return c == '@' || c == '/' || c == ':'
    93	
    94			case encodeQueryComponent: // §3.4
    95				// The RFC reserves (so we must escape) everything.
    96				return true
    97	
    98			case encodeFragment: // §4.1
    99				// The RFC text is silent but the grammar allows
   100				// everything, so escape nothing.
   101				return false
   102			}
   103		}
   104	
   105		// Everything else must be escaped.
   106		return true
   107	}
   108	
   109	// QueryUnescape does the inverse transformation of QueryEscape, converting
   110	// %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
   111	// any % is not followed by two hexadecimal digits.
   112	func QueryUnescape(s string) (string, error) {
   113		return unescape(s, encodeQueryComponent)
   114	}
   115	
   116	// unescape unescapes a string; the mode specifies
   117	// which section of the URL string is being unescaped.
   118	func unescape(s string, mode encoding) (string, error) {
   119		// Count %, check that they're well-formed.
   120		n := 0
   121		hasPlus := false
   122		for i := 0; i < len(s); {
   123			switch s[i] {
   124			case '%':
   125				n++
   126				if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   127					s = s[i:]
   128					if len(s) > 3 {
   129						s = s[0:3]
   130					}
   131					return "", EscapeError(s)
   132				}
   133				i += 3
   134			case '+':
   135				hasPlus = mode == encodeQueryComponent
   136				i++
   137			default:
   138				i++
   139			}
   140		}
   141	
   142		if n == 0 && !hasPlus {
   143			return s, nil
   144		}
   145	
   146		t := make([]byte, len(s)-2*n)
   147		j := 0
   148		for i := 0; i < len(s); {
   149			switch s[i] {
   150			case '%':
   151				t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   152				j++
   153				i += 3
   154			case '+':
   155				if mode == encodeQueryComponent {
   156					t[j] = ' '
   157				} else {
   158					t[j] = '+'
   159				}
   160				j++
   161				i++
   162			default:
   163				t[j] = s[i]
   164				j++
   165				i++
   166			}
   167		}
   168		return string(t), nil
   169	}
   170	
   171	// QueryEscape escapes the string so it can be safely placed
   172	// inside a URL query.
   173	func QueryEscape(s string) string {
   174		return escape(s, encodeQueryComponent)
   175	}
   176	
   177	func escape(s string, mode encoding) string {
   178		spaceCount, hexCount := 0, 0
   179		for i := 0; i < len(s); i++ {
   180			c := s[i]
   181			if shouldEscape(c, mode) {
   182				if c == ' ' && mode == encodeQueryComponent {
   183					spaceCount++
   184				} else {
   185					hexCount++
   186				}
   187			}
   188		}
   189	
   190		if spaceCount == 0 && hexCount == 0 {
   191			return s
   192		}
   193	
   194		t := make([]byte, len(s)+2*hexCount)
   195		j := 0
   196		for i := 0; i < len(s); i++ {
   197			switch c := s[i]; {
   198			case c == ' ' && mode == encodeQueryComponent:
   199				t[j] = '+'
   200				j++
   201			case shouldEscape(c, mode):
   202				t[j] = '%'
   203				t[j+1] = "0123456789ABCDEF"[c>>4]
   204				t[j+2] = "0123456789ABCDEF"[c&15]
   205				j += 3
   206			default:
   207				t[j] = s[i]
   208				j++
   209			}
   210		}
   211		return string(t)
   212	}
   213	
   214	// A URL represents a parsed URL (technically, a URI reference).
   215	// The general form represented is:
   216	//
   217	//	scheme://[userinfo@]host/path[?query][#fragment]
   218	//
   219	// URLs that do not start with a slash after the scheme are interpreted as:
   220	//
   221	//	scheme:opaque[?query][#fragment]
   222	//
   223	// Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
   224	// A consequence is that it is impossible to tell which slashes in the Path were
   225	// slashes in the raw URL and which were %2f. This distinction is rarely important,
   226	// but when it is a client must use other routines to parse the raw URL or construct
   227	// the parsed URL. For example, an HTTP server can consult req.RequestURI, and
   228	// an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"}
   229	// instead of URL{Host: "example.com", Path: "/Go/"}.
   230	type URL struct {
   231		Scheme   string
   232		Opaque   string    // encoded opaque data
   233		User     *Userinfo // username and password information
   234		Host     string    // host or host:port
   235		Path     string
   236		RawQuery string // encoded query values, without '?'
   237		Fragment string // fragment for references, without '#'
   238	}
   239	
   240	// User returns a Userinfo containing the provided username
   241	// and no password set.
   242	func User(username string) *Userinfo {
   243		return &Userinfo{username, "", false}
   244	}
   245	
   246	// UserPassword returns a Userinfo containing the provided username
   247	// and password.
   248	// This functionality should only be used with legacy web sites.
   249	// RFC 2396 warns that interpreting Userinfo this way
   250	// ``is NOT RECOMMENDED, because the passing of authentication
   251	// information in clear text (such as URI) has proven to be a
   252	// security risk in almost every case where it has been used.''
   253	func UserPassword(username, password string) *Userinfo {
   254		return &Userinfo{username, password, true}
   255	}
   256	
   257	// The Userinfo type is an immutable encapsulation of username and
   258	// password details for a URL. An existing Userinfo value is guaranteed
   259	// to have a username set (potentially empty, as allowed by RFC 2396),
   260	// and optionally a password.
   261	type Userinfo struct {
   262		username    string
   263		password    string
   264		passwordSet bool
   265	}
   266	
   267	// Username returns the username.
   268	func (u *Userinfo) Username() string {
   269		return u.username
   270	}
   271	
   272	// Password returns the password in case it is set, and whether it is set.
   273	func (u *Userinfo) Password() (string, bool) {
   274		if u.passwordSet {
   275			return u.password, true
   276		}
   277		return "", false
   278	}
   279	
   280	// String returns the encoded userinfo information in the standard form
   281	// of "username[:password]".
   282	func (u *Userinfo) String() string {
   283		s := escape(u.username, encodeUserPassword)
   284		if u.passwordSet {
   285			s += ":" + escape(u.password, encodeUserPassword)
   286		}
   287		return s
   288	}
   289	
   290	// Maybe rawurl is of the form scheme:path.
   291	// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
   292	// If so, return scheme, path; else return "", rawurl.
   293	func getscheme(rawurl string) (scheme, path string, err error) {
   294		for i := 0; i < len(rawurl); i++ {
   295			c := rawurl[i]
   296			switch {
   297			case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
   298			// do nothing
   299			case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
   300				if i == 0 {
   301					return "", rawurl, nil
   302				}
   303			case c == ':':
   304				if i == 0 {
   305					return "", "", errors.New("missing protocol scheme")
   306				}
   307				return rawurl[0:i], rawurl[i+1:], nil
   308			default:
   309				// we have encountered an invalid character,
   310				// so there is no valid scheme
   311				return "", rawurl, nil
   312			}
   313		}
   314		return "", rawurl, nil
   315	}
   316	
   317	// Maybe s is of the form t c u.
   318	// If so, return t, c u (or t, u if cutc == true).
   319	// If not, return s, "".
   320	func split(s string, c string, cutc bool) (string, string) {
   321		i := strings.Index(s, c)
   322		if i < 0 {
   323			return s, ""
   324		}
   325		if cutc {
   326			return s[0:i], s[i+len(c):]
   327		}
   328		return s[0:i], s[i:]
   329	}
   330	
   331	// Parse parses rawurl into a URL structure.
   332	// The rawurl may be relative or absolute.
   333	func Parse(rawurl string) (url *URL, err error) {
   334		// Cut off #frag
   335		u, frag := split(rawurl, "#", true)
   336		if url, err = parse(u, false); err != nil {
   337			return nil, err
   338		}
   339		if frag == "" {
   340			return url, nil
   341		}
   342		if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
   343			return nil, &Error{"parse", rawurl, err}
   344		}
   345		return url, nil
   346	}
   347	
   348	// ParseRequestURI parses rawurl into a URL structure.  It assumes that
   349	// rawurl was received in an HTTP request, so the rawurl is interpreted
   350	// only as an absolute URI or an absolute path.
   351	// The string rawurl is assumed not to have a #fragment suffix.
   352	// (Web browsers strip #fragment before sending the URL to a web server.)
   353	func ParseRequestURI(rawurl string) (url *URL, err error) {
   354		return parse(rawurl, true)
   355	}
   356	
   357	// parse parses a URL from a string in one of two contexts.  If
   358	// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
   359	// in which case only absolute URLs or path-absolute relative URLs are allowed.
   360	// If viaRequest is false, all forms of relative URLs are allowed.
   361	func parse(rawurl string, viaRequest bool) (url *URL, err error) {
   362		var rest string
   363	
   364		if rawurl == "" && viaRequest {
   365			err = errors.New("empty url")
   366			goto Error
   367		}
   368		url = new(URL)
   369	
   370		if rawurl == "*" {
   371			url.Path = "*"
   372			return
   373		}
   374	
   375		// Split off possible leading "http:", "mailto:", etc.
   376		// Cannot contain escaped characters.
   377		if url.Scheme, rest, err = getscheme(rawurl); err != nil {
   378			goto Error
   379		}
   380		url.Scheme = strings.ToLower(url.Scheme)
   381	
   382		rest, url.RawQuery = split(rest, "?", true)
   383	
   384		if !strings.HasPrefix(rest, "/") {
   385			if url.Scheme != "" {
   386				// We consider rootless paths per RFC 3986 as opaque.
   387				url.Opaque = rest
   388				return url, nil
   389			}
   390			if viaRequest {
   391				err = errors.New("invalid URI for request")
   392				goto Error
   393			}
   394		}
   395	
   396		if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
   397			var authority string
   398			authority, rest = split(rest[2:], "/", false)
   399			url.User, url.Host, err = parseAuthority(authority)
   400			if err != nil {
   401				goto Error
   402			}
   403			if strings.Contains(url.Host, "%") {
   404				err = errors.New("hexadecimal escape in host")
   405				goto Error
   406			}
   407		}
   408		if url.Path, err = unescape(rest, encodePath); err != nil {
   409			goto Error
   410		}
   411		return url, nil
   412	
   413	Error:
   414		return nil, &Error{"parse", rawurl, err}
   415	}
   416	
   417	func parseAuthority(authority string) (user *Userinfo, host string, err error) {
   418		i := strings.LastIndex(authority, "@")
   419		if i < 0 {
   420			host = authority
   421			return
   422		}
   423		userinfo, host := authority[:i], authority[i+1:]
   424		if strings.Index(userinfo, ":") < 0 {
   425			if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
   426				return
   427			}
   428			user = User(userinfo)
   429		} else {
   430			username, password := split(userinfo, ":", true)
   431			if username, err = unescape(username, encodeUserPassword); err != nil {
   432				return
   433			}
   434			if password, err = unescape(password, encodeUserPassword); err != nil {
   435				return
   436			}
   437			user = UserPassword(username, password)
   438		}
   439		return
   440	}
   441	
   442	// String reassembles the URL into a valid URL string.
   443	func (u *URL) String() string {
   444		var buf bytes.Buffer
   445		if u.Scheme != "" {
   446			buf.WriteString(u.Scheme)
   447			buf.WriteByte(':')
   448		}
   449		if u.Opaque != "" {
   450			buf.WriteString(u.Opaque)
   451		} else {
   452			if u.Scheme != "" || u.Host != "" || u.User != nil {
   453				buf.WriteString("//")
   454				if u := u.User; u != nil {
   455					buf.WriteString(u.String())
   456					buf.WriteByte('@')
   457				}
   458				if h := u.Host; h != "" {
   459					buf.WriteString(h)
   460				}
   461			}
   462			buf.WriteString(escape(u.Path, encodePath))
   463		}
   464		if u.RawQuery != "" {
   465			buf.WriteByte('?')
   466			buf.WriteString(u.RawQuery)
   467		}
   468		if u.Fragment != "" {
   469			buf.WriteByte('#')
   470			buf.WriteString(escape(u.Fragment, encodeFragment))
   471		}
   472		return buf.String()
   473	}
   474	
   475	// Values maps a string key to a list of values.
   476	// It is typically used for query parameters and form values.
   477	// Unlike in the http.Header map, the keys in a Values map
   478	// are case-sensitive.
   479	type Values map[string][]string
   480	
   481	// Get gets the first value associated with the given key.
   482	// If there are no values associated with the key, Get returns
   483	// the empty string. To access multiple values, use the map
   484	// directly.
   485	func (v Values) Get(key string) string {
   486		if v == nil {
   487			return ""
   488		}
   489		vs, ok := v[key]
   490		if !ok || len(vs) == 0 {
   491			return ""
   492		}
   493		return vs[0]
   494	}
   495	
   496	// Set sets the key to value. It replaces any existing
   497	// values.
   498	func (v Values) Set(key, value string) {
   499		v[key] = []string{value}
   500	}
   501	
   502	// Add adds the key to value. It appends to any existing
   503	// values associated with key.
   504	func (v Values) Add(key, value string) {
   505		v[key] = append(v[key], value)
   506	}
   507	
   508	// Del deletes the values associated with key.
   509	func (v Values) Del(key string) {
   510		delete(v, key)
   511	}
   512	
   513	// ParseQuery parses the URL-encoded query string and returns
   514	// a map listing the values specified for each key.
   515	// ParseQuery always returns a non-nil map containing all the
   516	// valid query parameters found; err describes the first decoding error
   517	// encountered, if any.
   518	func ParseQuery(query string) (m Values, err error) {
   519		m = make(Values)
   520		err = parseQuery(m, query)
   521		return
   522	}
   523	
   524	func parseQuery(m Values, query string) (err error) {
   525		for query != "" {
   526			key := query
   527			if i := strings.IndexAny(key, "&;"); i >= 0 {
   528				key, query = key[:i], key[i+1:]
   529			} else {
   530				query = ""
   531			}
   532			if key == "" {
   533				continue
   534			}
   535			value := ""
   536			if i := strings.Index(key, "="); i >= 0 {
   537				key, value = key[:i], key[i+1:]
   538			}
   539			key, err1 := QueryUnescape(key)
   540			if err1 != nil {
   541				if err == nil {
   542					err = err1
   543				}
   544				continue
   545			}
   546			value, err1 = QueryUnescape(value)
   547			if err1 != nil {
   548				if err == nil {
   549					err = err1
   550				}
   551				continue
   552			}
   553			m[key] = append(m[key], value)
   554		}
   555		return err
   556	}
   557	
   558	// Encode encodes the values into ``URL encoded'' form.
   559	// e.g. "foo=bar&bar=baz"
   560	func (v Values) Encode() string {
   561		if v == nil {
   562			return ""
   563		}
   564		var buf bytes.Buffer
   565		keys := make([]string, 0, len(v))
   566		for k := range v {
   567			keys = append(keys, k)
   568		}
   569		sort.Strings(keys)
   570		for _, k := range keys {
   571			vs := v[k]
   572			prefix := QueryEscape(k) + "="
   573			for _, v := range vs {
   574				if buf.Len() > 0 {
   575					buf.WriteByte('&')
   576				}
   577				buf.WriteString(prefix)
   578				buf.WriteString(QueryEscape(v))
   579			}
   580		}
   581		return buf.String()
   582	}
   583	
   584	// resolvePath applies special path segments from refs and applies
   585	// them to base, per RFC 3986.
   586	func resolvePath(base, ref string) string {
   587		var full string
   588		if ref == "" {
   589			full = base
   590		} else if ref[0] != '/' {
   591			i := strings.LastIndex(base, "/")
   592			full = base[:i+1] + ref
   593		} else {
   594			full = ref
   595		}
   596		if full == "" {
   597			return ""
   598		}
   599		var dst []string
   600		src := strings.Split(full, "/")
   601		for _, elem := range src {
   602			switch elem {
   603			case ".":
   604				// drop
   605			case "..":
   606				if len(dst) > 0 {
   607					dst = dst[:len(dst)-1]
   608				}
   609			default:
   610				dst = append(dst, elem)
   611			}
   612		}
   613		if last := src[len(src)-1]; last == "." || last == ".." {
   614			// Add final slash to the joined path.
   615			dst = append(dst, "")
   616		}
   617		return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/")
   618	}
   619	
   620	// IsAbs returns true if the URL is absolute.
   621	func (u *URL) IsAbs() bool {
   622		return u.Scheme != ""
   623	}
   624	
   625	// Parse parses a URL in the context of the receiver.  The provided URL
   626	// may be relative or absolute.  Parse returns nil, err on parse
   627	// failure, otherwise its return value is the same as ResolveReference.
   628	func (u *URL) Parse(ref string) (*URL, error) {
   629		refurl, err := Parse(ref)
   630		if err != nil {
   631			return nil, err
   632		}
   633		return u.ResolveReference(refurl), nil
   634	}
   635	
   636	// ResolveReference resolves a URI reference to an absolute URI from
   637	// an absolute base URI, per RFC 3986 Section 5.2.  The URI reference
   638	// may be relative or absolute.  ResolveReference always returns a new
   639	// URL instance, even if the returned URL is identical to either the
   640	// base or reference. If ref is an absolute URL, then ResolveReference
   641	// ignores base and returns a copy of ref.
   642	func (u *URL) ResolveReference(ref *URL) *URL {
   643		url := *ref
   644		if ref.Scheme == "" {
   645			url.Scheme = u.Scheme
   646		}
   647		if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
   648			// The "absoluteURI" or "net_path" cases.
   649			url.Path = resolvePath(ref.Path, "")
   650			return &url
   651		}
   652		if ref.Opaque != "" {
   653			url.User = nil
   654			url.Host = ""
   655			url.Path = ""
   656			return &url
   657		}
   658		if ref.Path == "" {
   659			if ref.RawQuery == "" {
   660				url.RawQuery = u.RawQuery
   661				if ref.Fragment == "" {
   662					url.Fragment = u.Fragment
   663				}
   664			}
   665		}
   666		// The "abs_path" or "rel_path" cases.
   667		url.Host = u.Host
   668		url.User = u.User
   669		url.Path = resolvePath(u.Path, ref.Path)
   670		return &url
   671	}
   672	
   673	// Query parses RawQuery and returns the corresponding values.
   674	func (u *URL) Query() Values {
   675		v, _ := ParseQuery(u.RawQuery)
   676		return v
   677	}
   678	
   679	// RequestURI returns the encoded path?query or opaque?query
   680	// string that would be used in an HTTP request for u.
   681	func (u *URL) RequestURI() string {
   682		result := u.Opaque
   683		if result == "" {
   684			result = escape(u.Path, encodePath)
   685			if result == "" {
   686				result = "/"
   687			}
   688		} else {
   689			if strings.HasPrefix(result, "//") {
   690				result = u.Scheme + ":" + result
   691			}
   692		}
   693		if u.RawQuery != "" {
   694			result += "?" + u.RawQuery
   695		}
   696		return result
   697	}

View as plain text