Run Format

Source file src/pkg/net/url/url.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package url parses URLs and implements query escaping.
     6	// See RFC 3986.
     7	package url
     8	
     9	import (
    10		"bytes"
    11		"errors"
    12		"sort"
    13		"strconv"
    14		"strings"
    15	)
    16	
    17	// Error reports an error and the operation and URL that caused it.
    18	type Error struct {
    19		Op  string
    20		URL string
    21		Err error
    22	}
    23	
    24	func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
    25	
    26	func ishex(c byte) bool {
    27		switch {
    28		case '0' <= c && c <= '9':
    29			return true
    30		case 'a' <= c && c <= 'f':
    31			return true
    32		case 'A' <= c && c <= 'F':
    33			return true
    34		}
    35		return false
    36	}
    37	
    38	func unhex(c byte) byte {
    39		switch {
    40		case '0' <= c && c <= '9':
    41			return c - '0'
    42		case 'a' <= c && c <= 'f':
    43			return c - 'a' + 10
    44		case 'A' <= c && c <= 'F':
    45			return c - 'A' + 10
    46		}
    47		return 0
    48	}
    49	
    50	type encoding int
    51	
    52	const (
    53		encodePath encoding = 1 + iota
    54		encodeUserPassword
    55		encodeQueryComponent
    56		encodeFragment
    57	)
    58	
    59	type EscapeError string
    60	
    61	func (e EscapeError) Error() string {
    62		return "invalid URL escape " + strconv.Quote(string(e))
    63	}
    64	
    65	// Return true if the specified character should be escaped when
    66	// appearing in a URL string, according to RFC 3986.
    67	// When 'all' is true the full range of reserved characters are matched.
    68	func shouldEscape(c byte, mode encoding) bool {
    69		// §2.3 Unreserved characters (alphanum)
    70		if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
    71			return false
    72		}
    73	
    74		switch c {
    75		case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
    76			return false
    77	
    78		case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
    79			// Different sections of the URL allow a few of
    80			// the reserved characters to appear unescaped.
    81			switch mode {
    82			case encodePath: // §3.3
    83				// The RFC allows : @ & = + $ but saves / ; , for assigning
    84				// meaning to individual path segments. This package
    85				// only manipulates the path as a whole, so we allow those
    86				// last two as well. That leaves only ? to escape.
    87				return c == '?'
    88	
    89			case encodeUserPassword: // §3.2.2
    90				// The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
    91				// The parsing of userinfo treats : as special so we must escape that too.
    92				return c == '@' || c == '/' || c == ':'
    93	
    94			case encodeQueryComponent: // §3.4
    95				// The RFC reserves (so we must escape) everything.
    96				return true
    97	
    98			case encodeFragment: // §4.1
    99				// The RFC text is silent but the grammar allows
   100				// everything, so escape nothing.
   101				return false
   102			}
   103		}
   104	
   105		// Everything else must be escaped.
   106		return true
   107	}
   108	
   109	// QueryUnescape does the inverse transformation of QueryEscape, converting
   110	// %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
   111	// any % is not followed by two hexadecimal digits.
   112	func QueryUnescape(s string) (string, error) {
   113		return unescape(s, encodeQueryComponent)
   114	}
   115	
   116	// unescape unescapes a string; the mode specifies
   117	// which section of the URL string is being unescaped.
   118	func unescape(s string, mode encoding) (string, error) {
   119		// Count %, check that they're well-formed.
   120		n := 0
   121		hasPlus := false
   122		for i := 0; i < len(s); {
   123			switch s[i] {
   124			case '%':
   125				n++
   126				if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   127					s = s[i:]
   128					if len(s) > 3 {
   129						s = s[0:3]
   130					}
   131					return "", EscapeError(s)
   132				}
   133				i += 3
   134			case '+':
   135				hasPlus = mode == encodeQueryComponent
   136				i++
   137			default:
   138				i++
   139			}
   140		}
   141	
   142		if n == 0 && !hasPlus {
   143			return s, nil
   144		}
   145	
   146		t := make([]byte, len(s)-2*n)
   147		j := 0
   148		for i := 0; i < len(s); {
   149			switch s[i] {
   150			case '%':
   151				t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   152				j++
   153				i += 3
   154			case '+':
   155				if mode == encodeQueryComponent {
   156					t[j] = ' '
   157				} else {
   158					t[j] = '+'
   159				}
   160				j++
   161				i++
   162			default:
   163				t[j] = s[i]
   164				j++
   165				i++
   166			}
   167		}
   168		return string(t), nil
   169	}
   170	
   171	// QueryEscape escapes the string so it can be safely placed
   172	// inside a URL query.
   173	func QueryEscape(s string) string {
   174		return escape(s, encodeQueryComponent)
   175	}
   176	
   177	func escape(s string, mode encoding) string {
   178		spaceCount, hexCount := 0, 0
   179		for i := 0; i < len(s); i++ {
   180			c := s[i]
   181			if shouldEscape(c, mode) {
   182				if c == ' ' && mode == encodeQueryComponent {
   183					spaceCount++
   184				} else {
   185					hexCount++
   186				}
   187			}
   188		}
   189	
   190		if spaceCount == 0 && hexCount == 0 {
   191			return s
   192		}
   193	
   194		t := make([]byte, len(s)+2*hexCount)
   195		j := 0
   196		for i := 0; i < len(s); i++ {
   197			switch c := s[i]; {
   198			case c == ' ' && mode == encodeQueryComponent:
   199				t[j] = '+'
   200				j++
   201			case shouldEscape(c, mode):
   202				t[j] = '%'
   203				t[j+1] = "0123456789ABCDEF"[c>>4]
   204				t[j+2] = "0123456789ABCDEF"[c&15]
   205				j += 3
   206			default:
   207				t[j] = s[i]
   208				j++
   209			}
   210		}
   211		return string(t)
   212	}
   213	
   214	// A URL represents a parsed URL (technically, a URI reference).
   215	// The general form represented is:
   216	//
   217	//	scheme://[userinfo@]host/path[?query][#fragment]
   218	//
   219	// URLs that do not start with a slash after the scheme are interpreted as:
   220	//
   221	//	scheme:opaque[?query][#fragment]
   222	//
   223	// Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
   224	// A consequence is that it is impossible to tell which slashes in the Path were
   225	// slashes in the raw URL and which were %2f. This distinction is rarely important,
   226	// but when it is a client must use other routines to parse the raw URL or construct
   227	// the parsed URL. For example, an HTTP server can consult req.RequestURI, and
   228	// an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"}
   229	// instead of URL{Host: "example.com", Path: "/Go/"}.
   230	type URL struct {
   231		Scheme   string
   232		Opaque   string    // encoded opaque data
   233		User     *Userinfo // username and password information
   234		Host     string    // host or host:port
   235		Path     string
   236		RawQuery string // encoded query values, without '?'
   237		Fragment string // fragment for references, without '#'
   238	}
   239	
   240	// User returns a Userinfo containing the provided username
   241	// and no password set.
   242	func User(username string) *Userinfo {
   243		return &Userinfo{username, "", false}
   244	}
   245	
   246	// UserPassword returns a Userinfo containing the provided username
   247	// and password.
   248	// This functionality should only be used with legacy web sites.
   249	// RFC 2396 warns that interpreting Userinfo this way
   250	// ``is NOT RECOMMENDED, because the passing of authentication
   251	// information in clear text (such as URI) has proven to be a
   252	// security risk in almost every case where it has been used.''
   253	func UserPassword(username, password string) *Userinfo {
   254		return &Userinfo{username, password, true}
   255	}
   256	
   257	// The Userinfo type is an immutable encapsulation of username and
   258	// password details for a URL. An existing Userinfo value is guaranteed
   259	// to have a username set (potentially empty, as allowed by RFC 2396),
   260	// and optionally a password.
   261	type Userinfo struct {
   262		username    string
   263		password    string
   264		passwordSet bool
   265	}
   266	
   267	// Username returns the username.
   268	func (u *Userinfo) Username() string {
   269		return u.username
   270	}
   271	
   272	// Password returns the password in case it is set, and whether it is set.
   273	func (u *Userinfo) Password() (string, bool) {
   274		if u.passwordSet {
   275			return u.password, true
   276		}
   277		return "", false
   278	}
   279	
   280	// String returns the encoded userinfo information in the standard form
   281	// of "username[:password]".
   282	func (u *Userinfo) String() string {
   283		s := escape(u.username, encodeUserPassword)
   284		if u.passwordSet {
   285			s += ":" + escape(u.password, encodeUserPassword)
   286		}
   287		return s
   288	}
   289	
   290	// Maybe rawurl is of the form scheme:path.
   291	// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
   292	// If so, return scheme, path; else return "", rawurl.
   293	func getscheme(rawurl string) (scheme, path string, err error) {
   294		for i := 0; i < len(rawurl); i++ {
   295			c := rawurl[i]
   296			switch {
   297			case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
   298			// do nothing
   299			case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
   300				if i == 0 {
   301					return "", rawurl, nil
   302				}
   303			case c == ':':
   304				if i == 0 {
   305					return "", "", errors.New("missing protocol scheme")
   306				}
   307				return rawurl[0:i], rawurl[i+1:], nil
   308			default:
   309				// we have encountered an invalid character,
   310				// so there is no valid scheme
   311				return "", rawurl, nil
   312			}
   313		}
   314		return "", rawurl, nil
   315	}
   316	
   317	// Maybe s is of the form t c u.
   318	// If so, return t, c u (or t, u if cutc == true).
   319	// If not, return s, "".
   320	func split(s string, c string, cutc bool) (string, string) {
   321		i := strings.Index(s, c)
   322		if i < 0 {
   323			return s, ""
   324		}
   325		if cutc {
   326			return s[0:i], s[i+len(c):]
   327		}
   328		return s[0:i], s[i:]
   329	}
   330	
   331	// Parse parses rawurl into a URL structure.
   332	// The rawurl may be relative or absolute.
   333	func Parse(rawurl string) (url *URL, err error) {
   334		// Cut off #frag
   335		u, frag := split(rawurl, "#", true)
   336		if url, err = parse(u, false); err != nil {
   337			return nil, err
   338		}
   339		if frag == "" {
   340			return url, nil
   341		}
   342		if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
   343			return nil, &Error{"parse", rawurl, err}
   344		}
   345		return url, nil
   346	}
   347	
   348	// ParseRequestURI parses rawurl into a URL structure.  It assumes that
   349	// rawurl was received in an HTTP request, so the rawurl is interpreted
   350	// only as an absolute URI or an absolute path.
   351	// The string rawurl is assumed not to have a #fragment suffix.
   352	// (Web browsers strip #fragment before sending the URL to a web server.)
   353	func ParseRequestURI(rawurl string) (url *URL, err error) {
   354		return parse(rawurl, true)
   355	}
   356	
   357	// parse parses a URL from a string in one of two contexts.  If
   358	// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
   359	// in which case only absolute URLs or path-absolute relative URLs are allowed.
   360	// If viaRequest is false, all forms of relative URLs are allowed.
   361	func parse(rawurl string, viaRequest bool) (url *URL, err error) {
   362		var rest string
   363	
   364		if rawurl == "" && viaRequest {
   365			err = errors.New("empty url")
   366			goto Error
   367		}
   368		url = new(URL)
   369	
   370		if rawurl == "*" {
   371			url.Path = "*"
   372			return
   373		}
   374	
   375		// Split off possible leading "http:", "mailto:", etc.
   376		// Cannot contain escaped characters.
   377		if url.Scheme, rest, err = getscheme(rawurl); err != nil {
   378			goto Error
   379		}
   380		url.Scheme = strings.ToLower(url.Scheme)
   381	
   382		rest, url.RawQuery = split(rest, "?", true)
   383	
   384		if !strings.HasPrefix(rest, "/") {
   385			if url.Scheme != "" {
   386				// We consider rootless paths per RFC 3986 as opaque.
   387				url.Opaque = rest
   388				return url, nil
   389			}
   390			if viaRequest {
   391				err = errors.New("invalid URI for request")
   392				goto Error
   393			}
   394		}
   395	
   396		if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
   397			var authority string
   398			authority, rest = split(rest[2:], "/", false)
   399			url.User, url.Host, err = parseAuthority(authority)
   400			if err != nil {
   401				goto Error
   402			}
   403			if strings.Contains(url.Host, "%") {
   404				err = errors.New("hexadecimal escape in host")
   405				goto Error
   406			}
   407		}
   408		if url.Path, err = unescape(rest, encodePath); err != nil {
   409			goto Error
   410		}
   411		return url, nil
   412	
   413	Error:
   414		return nil, &Error{"parse", rawurl, err}
   415	}
   416	
   417	func parseAuthority(authority string) (user *Userinfo, host string, err error) {
   418		i := strings.LastIndex(authority, "@")
   419		if i < 0 {
   420			host = authority
   421			return
   422		}
   423		userinfo, host := authority[:i], authority[i+1:]
   424		if strings.Index(userinfo, ":") < 0 {
   425			if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
   426				return
   427			}
   428			user = User(userinfo)
   429		} else {
   430			username, password := split(userinfo, ":", true)
   431			if username, err = unescape(username, encodeUserPassword); err != nil {
   432				return
   433			}
   434			if password, err = unescape(password, encodeUserPassword); err != nil {
   435				return
   436			}
   437			user = UserPassword(username, password)
   438		}
   439		return
   440	}
   441	
   442	// String reassembles the URL into a valid URL string.
   443	func (u *URL) String() string {
   444		var buf bytes.Buffer
   445		if u.Scheme != "" {
   446			buf.WriteString(u.Scheme)
   447			buf.WriteByte(':')
   448		}
   449		if u.Opaque != "" {
   450			buf.WriteString(u.Opaque)
   451		} else {
   452			if u.Scheme != "" || u.Host != "" || u.User != nil {
   453				buf.WriteString("//")
   454				if ui := u.User; ui != nil {
   455					buf.WriteString(ui.String())
   456					buf.WriteByte('@')
   457				}
   458				if h := u.Host; h != "" {
   459					buf.WriteString(h)
   460				}
   461			}
   462			if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
   463				buf.WriteByte('/')
   464			}
   465			buf.WriteString(escape(u.Path, encodePath))
   466		}
   467		if u.RawQuery != "" {
   468			buf.WriteByte('?')
   469			buf.WriteString(u.RawQuery)
   470		}
   471		if u.Fragment != "" {
   472			buf.WriteByte('#')
   473			buf.WriteString(escape(u.Fragment, encodeFragment))
   474		}
   475		return buf.String()
   476	}
   477	
   478	// Values maps a string key to a list of values.
   479	// It is typically used for query parameters and form values.
   480	// Unlike in the http.Header map, the keys in a Values map
   481	// are case-sensitive.
   482	type Values map[string][]string
   483	
   484	// Get gets the first value associated with the given key.
   485	// If there are no values associated with the key, Get returns
   486	// the empty string. To access multiple values, use the map
   487	// directly.
   488	func (v Values) Get(key string) string {
   489		if v == nil {
   490			return ""
   491		}
   492		vs, ok := v[key]
   493		if !ok || len(vs) == 0 {
   494			return ""
   495		}
   496		return vs[0]
   497	}
   498	
   499	// Set sets the key to value. It replaces any existing
   500	// values.
   501	func (v Values) Set(key, value string) {
   502		v[key] = []string{value}
   503	}
   504	
   505	// Add adds the key to value. It appends to any existing
   506	// values associated with key.
   507	func (v Values) Add(key, value string) {
   508		v[key] = append(v[key], value)
   509	}
   510	
   511	// Del deletes the values associated with key.
   512	func (v Values) Del(key string) {
   513		delete(v, key)
   514	}
   515	
   516	// ParseQuery parses the URL-encoded query string and returns
   517	// a map listing the values specified for each key.
   518	// ParseQuery always returns a non-nil map containing all the
   519	// valid query parameters found; err describes the first decoding error
   520	// encountered, if any.
   521	func ParseQuery(query string) (m Values, err error) {
   522		m = make(Values)
   523		err = parseQuery(m, query)
   524		return
   525	}
   526	
   527	func parseQuery(m Values, query string) (err error) {
   528		for query != "" {
   529			key := query
   530			if i := strings.IndexAny(key, "&;"); i >= 0 {
   531				key, query = key[:i], key[i+1:]
   532			} else {
   533				query = ""
   534			}
   535			if key == "" {
   536				continue
   537			}
   538			value := ""
   539			if i := strings.Index(key, "="); i >= 0 {
   540				key, value = key[:i], key[i+1:]
   541			}
   542			key, err1 := QueryUnescape(key)
   543			if err1 != nil {
   544				if err == nil {
   545					err = err1
   546				}
   547				continue
   548			}
   549			value, err1 = QueryUnescape(value)
   550			if err1 != nil {
   551				if err == nil {
   552					err = err1
   553				}
   554				continue
   555			}
   556			m[key] = append(m[key], value)
   557		}
   558		return err
   559	}
   560	
   561	// Encode encodes the values into ``URL encoded'' form
   562	// ("bar=baz&foo=quux") sorted by key.
   563	func (v Values) Encode() string {
   564		if v == nil {
   565			return ""
   566		}
   567		var buf bytes.Buffer
   568		keys := make([]string, 0, len(v))
   569		for k := range v {
   570			keys = append(keys, k)
   571		}
   572		sort.Strings(keys)
   573		for _, k := range keys {
   574			vs := v[k]
   575			prefix := QueryEscape(k) + "="
   576			for _, v := range vs {
   577				if buf.Len() > 0 {
   578					buf.WriteByte('&')
   579				}
   580				buf.WriteString(prefix)
   581				buf.WriteString(QueryEscape(v))
   582			}
   583		}
   584		return buf.String()
   585	}
   586	
   587	// resolvePath applies special path segments from refs and applies
   588	// them to base, per RFC 3986.
   589	func resolvePath(base, ref string) string {
   590		var full string
   591		if ref == "" {
   592			full = base
   593		} else if ref[0] != '/' {
   594			i := strings.LastIndex(base, "/")
   595			full = base[:i+1] + ref
   596		} else {
   597			full = ref
   598		}
   599		if full == "" {
   600			return ""
   601		}
   602		var dst []string
   603		src := strings.Split(full, "/")
   604		for _, elem := range src {
   605			switch elem {
   606			case ".":
   607				// drop
   608			case "..":
   609				if len(dst) > 0 {
   610					dst = dst[:len(dst)-1]
   611				}
   612			default:
   613				dst = append(dst, elem)
   614			}
   615		}
   616		if last := src[len(src)-1]; last == "." || last == ".." {
   617			// Add final slash to the joined path.
   618			dst = append(dst, "")
   619		}
   620		return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/")
   621	}
   622	
   623	// IsAbs returns true if the URL is absolute.
   624	func (u *URL) IsAbs() bool {
   625		return u.Scheme != ""
   626	}
   627	
   628	// Parse parses a URL in the context of the receiver.  The provided URL
   629	// may be relative or absolute.  Parse returns nil, err on parse
   630	// failure, otherwise its return value is the same as ResolveReference.
   631	func (u *URL) Parse(ref string) (*URL, error) {
   632		refurl, err := Parse(ref)
   633		if err != nil {
   634			return nil, err
   635		}
   636		return u.ResolveReference(refurl), nil
   637	}
   638	
   639	// ResolveReference resolves a URI reference to an absolute URI from
   640	// an absolute base URI, per RFC 3986 Section 5.2.  The URI reference
   641	// may be relative or absolute.  ResolveReference always returns a new
   642	// URL instance, even if the returned URL is identical to either the
   643	// base or reference. If ref is an absolute URL, then ResolveReference
   644	// ignores base and returns a copy of ref.
   645	func (u *URL) ResolveReference(ref *URL) *URL {
   646		url := *ref
   647		if ref.Scheme == "" {
   648			url.Scheme = u.Scheme
   649		}
   650		if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
   651			// The "absoluteURI" or "net_path" cases.
   652			url.Path = resolvePath(ref.Path, "")
   653			return &url
   654		}
   655		if ref.Opaque != "" {
   656			url.User = nil
   657			url.Host = ""
   658			url.Path = ""
   659			return &url
   660		}
   661		if ref.Path == "" {
   662			if ref.RawQuery == "" {
   663				url.RawQuery = u.RawQuery
   664				if ref.Fragment == "" {
   665					url.Fragment = u.Fragment
   666				}
   667			}
   668		}
   669		// The "abs_path" or "rel_path" cases.
   670		url.Host = u.Host
   671		url.User = u.User
   672		url.Path = resolvePath(u.Path, ref.Path)
   673		return &url
   674	}
   675	
   676	// Query parses RawQuery and returns the corresponding values.
   677	func (u *URL) Query() Values {
   678		v, _ := ParseQuery(u.RawQuery)
   679		return v
   680	}
   681	
   682	// RequestURI returns the encoded path?query or opaque?query
   683	// string that would be used in an HTTP request for u.
   684	func (u *URL) RequestURI() string {
   685		result := u.Opaque
   686		if result == "" {
   687			result = escape(u.Path, encodePath)
   688			if result == "" {
   689				result = "/"
   690			}
   691		} else {
   692			if strings.HasPrefix(result, "//") {
   693				result = u.Scheme + ":" + result
   694			}
   695		}
   696		if u.RawQuery != "" {
   697			result += "?" + u.RawQuery
   698		}
   699		return result
   700	}

View as plain text