...
Run Format

Source file src/net/url/url.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package url parses URLs and implements query escaping.
     6	// See RFC 3986.
     7	package url
     8	
     9	import (
    10		"bytes"
    11		"errors"
    12		"sort"
    13		"strconv"
    14		"strings"
    15	)
    16	
    17	// Error reports an error and the operation and URL that caused it.
    18	type Error struct {
    19		Op  string
    20		URL string
    21		Err error
    22	}
    23	
    24	func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
    25	
    26	func ishex(c byte) bool {
    27		switch {
    28		case '0' <= c && c <= '9':
    29			return true
    30		case 'a' <= c && c <= 'f':
    31			return true
    32		case 'A' <= c && c <= 'F':
    33			return true
    34		}
    35		return false
    36	}
    37	
    38	func unhex(c byte) byte {
    39		switch {
    40		case '0' <= c && c <= '9':
    41			return c - '0'
    42		case 'a' <= c && c <= 'f':
    43			return c - 'a' + 10
    44		case 'A' <= c && c <= 'F':
    45			return c - 'A' + 10
    46		}
    47		return 0
    48	}
    49	
    50	type encoding int
    51	
    52	const (
    53		encodePath encoding = 1 + iota
    54		encodeUserPassword
    55		encodeQueryComponent
    56		encodeFragment
    57	)
    58	
    59	type EscapeError string
    60	
    61	func (e EscapeError) Error() string {
    62		return "invalid URL escape " + strconv.Quote(string(e))
    63	}
    64	
    65	// Return true if the specified character should be escaped when
    66	// appearing in a URL string, according to RFC 3986.
    67	func shouldEscape(c byte, mode encoding) bool {
    68		// §2.3 Unreserved characters (alphanum)
    69		if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
    70			return false
    71		}
    72	
    73		switch c {
    74		case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
    75			return false
    76	
    77		case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
    78			// Different sections of the URL allow a few of
    79			// the reserved characters to appear unescaped.
    80			switch mode {
    81			case encodePath: // §3.3
    82				// The RFC allows : @ & = + $ but saves / ; , for assigning
    83				// meaning to individual path segments. This package
    84				// only manipulates the path as a whole, so we allow those
    85				// last two as well. That leaves only ? to escape.
    86				return c == '?'
    87	
    88			case encodeUserPassword: // §3.2.1
    89				// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
    90				// userinfo, so we must escape only '@', '/', and '?'.
    91				// The parsing of userinfo treats ':' as special so we must escape
    92				// that too.
    93				return c == '@' || c == '/' || c == '?' || c == ':'
    94	
    95			case encodeQueryComponent: // §3.4
    96				// The RFC reserves (so we must escape) everything.
    97				return true
    98	
    99			case encodeFragment: // §4.1
   100				// The RFC text is silent but the grammar allows
   101				// everything, so escape nothing.
   102				return false
   103			}
   104		}
   105	
   106		// Everything else must be escaped.
   107		return true
   108	}
   109	
   110	// QueryUnescape does the inverse transformation of QueryEscape, converting
   111	// %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
   112	// any % is not followed by two hexadecimal digits.
   113	func QueryUnescape(s string) (string, error) {
   114		return unescape(s, encodeQueryComponent)
   115	}
   116	
   117	// unescape unescapes a string; the mode specifies
   118	// which section of the URL string is being unescaped.
   119	func unescape(s string, mode encoding) (string, error) {
   120		// Count %, check that they're well-formed.
   121		n := 0
   122		hasPlus := false
   123		for i := 0; i < len(s); {
   124			switch s[i] {
   125			case '%':
   126				n++
   127				if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   128					s = s[i:]
   129					if len(s) > 3 {
   130						s = s[0:3]
   131					}
   132					return "", EscapeError(s)
   133				}
   134				i += 3
   135			case '+':
   136				hasPlus = mode == encodeQueryComponent
   137				i++
   138			default:
   139				i++
   140			}
   141		}
   142	
   143		if n == 0 && !hasPlus {
   144			return s, nil
   145		}
   146	
   147		t := make([]byte, len(s)-2*n)
   148		j := 0
   149		for i := 0; i < len(s); {
   150			switch s[i] {
   151			case '%':
   152				t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   153				j++
   154				i += 3
   155			case '+':
   156				if mode == encodeQueryComponent {
   157					t[j] = ' '
   158				} else {
   159					t[j] = '+'
   160				}
   161				j++
   162				i++
   163			default:
   164				t[j] = s[i]
   165				j++
   166				i++
   167			}
   168		}
   169		return string(t), nil
   170	}
   171	
   172	// QueryEscape escapes the string so it can be safely placed
   173	// inside a URL query.
   174	func QueryEscape(s string) string {
   175		return escape(s, encodeQueryComponent)
   176	}
   177	
   178	func escape(s string, mode encoding) string {
   179		spaceCount, hexCount := 0, 0
   180		for i := 0; i < len(s); i++ {
   181			c := s[i]
   182			if shouldEscape(c, mode) {
   183				if c == ' ' && mode == encodeQueryComponent {
   184					spaceCount++
   185				} else {
   186					hexCount++
   187				}
   188			}
   189		}
   190	
   191		if spaceCount == 0 && hexCount == 0 {
   192			return s
   193		}
   194	
   195		t := make([]byte, len(s)+2*hexCount)
   196		j := 0
   197		for i := 0; i < len(s); i++ {
   198			switch c := s[i]; {
   199			case c == ' ' && mode == encodeQueryComponent:
   200				t[j] = '+'
   201				j++
   202			case shouldEscape(c, mode):
   203				t[j] = '%'
   204				t[j+1] = "0123456789ABCDEF"[c>>4]
   205				t[j+2] = "0123456789ABCDEF"[c&15]
   206				j += 3
   207			default:
   208				t[j] = s[i]
   209				j++
   210			}
   211		}
   212		return string(t)
   213	}
   214	
   215	// A URL represents a parsed URL (technically, a URI reference).
   216	// The general form represented is:
   217	//
   218	//	scheme://[userinfo@]host/path[?query][#fragment]
   219	//
   220	// URLs that do not start with a slash after the scheme are interpreted as:
   221	//
   222	//	scheme:opaque[?query][#fragment]
   223	//
   224	// Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
   225	// A consequence is that it is impossible to tell which slashes in the Path were
   226	// slashes in the raw URL and which were %2f. This distinction is rarely important,
   227	// but when it is a client must use other routines to parse the raw URL or construct
   228	// the parsed URL. For example, an HTTP server can consult req.RequestURI, and
   229	// an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"}
   230	// instead of URL{Host: "example.com", Path: "/Go/"}.
   231	type URL struct {
   232		Scheme   string
   233		Opaque   string    // encoded opaque data
   234		User     *Userinfo // username and password information
   235		Host     string    // host or host:port
   236		Path     string
   237		RawQuery string // encoded query values, without '?'
   238		Fragment string // fragment for references, without '#'
   239	}
   240	
   241	// User returns a Userinfo containing the provided username
   242	// and no password set.
   243	func User(username string) *Userinfo {
   244		return &Userinfo{username, "", false}
   245	}
   246	
   247	// UserPassword returns a Userinfo containing the provided username
   248	// and password.
   249	// This functionality should only be used with legacy web sites.
   250	// RFC 2396 warns that interpreting Userinfo this way
   251	// ``is NOT RECOMMENDED, because the passing of authentication
   252	// information in clear text (such as URI) has proven to be a
   253	// security risk in almost every case where it has been used.''
   254	func UserPassword(username, password string) *Userinfo {
   255		return &Userinfo{username, password, true}
   256	}
   257	
   258	// The Userinfo type is an immutable encapsulation of username and
   259	// password details for a URL. An existing Userinfo value is guaranteed
   260	// to have a username set (potentially empty, as allowed by RFC 2396),
   261	// and optionally a password.
   262	type Userinfo struct {
   263		username    string
   264		password    string
   265		passwordSet bool
   266	}
   267	
   268	// Username returns the username.
   269	func (u *Userinfo) Username() string {
   270		return u.username
   271	}
   272	
   273	// Password returns the password in case it is set, and whether it is set.
   274	func (u *Userinfo) Password() (string, bool) {
   275		if u.passwordSet {
   276			return u.password, true
   277		}
   278		return "", false
   279	}
   280	
   281	// String returns the encoded userinfo information in the standard form
   282	// of "username[:password]".
   283	func (u *Userinfo) String() string {
   284		s := escape(u.username, encodeUserPassword)
   285		if u.passwordSet {
   286			s += ":" + escape(u.password, encodeUserPassword)
   287		}
   288		return s
   289	}
   290	
   291	// Maybe rawurl is of the form scheme:path.
   292	// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
   293	// If so, return scheme, path; else return "", rawurl.
   294	func getscheme(rawurl string) (scheme, path string, err error) {
   295		for i := 0; i < len(rawurl); i++ {
   296			c := rawurl[i]
   297			switch {
   298			case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
   299			// do nothing
   300			case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
   301				if i == 0 {
   302					return "", rawurl, nil
   303				}
   304			case c == ':':
   305				if i == 0 {
   306					return "", "", errors.New("missing protocol scheme")
   307				}
   308				return rawurl[0:i], rawurl[i+1:], nil
   309			default:
   310				// we have encountered an invalid character,
   311				// so there is no valid scheme
   312				return "", rawurl, nil
   313			}
   314		}
   315		return "", rawurl, nil
   316	}
   317	
   318	// Maybe s is of the form t c u.
   319	// If so, return t, c u (or t, u if cutc == true).
   320	// If not, return s, "".
   321	func split(s string, c string, cutc bool) (string, string) {
   322		i := strings.Index(s, c)
   323		if i < 0 {
   324			return s, ""
   325		}
   326		if cutc {
   327			return s[0:i], s[i+len(c):]
   328		}
   329		return s[0:i], s[i:]
   330	}
   331	
   332	// Parse parses rawurl into a URL structure.
   333	// The rawurl may be relative or absolute.
   334	func Parse(rawurl string) (url *URL, err error) {
   335		// Cut off #frag
   336		u, frag := split(rawurl, "#", true)
   337		if url, err = parse(u, false); err != nil {
   338			return nil, err
   339		}
   340		if frag == "" {
   341			return url, nil
   342		}
   343		if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
   344			return nil, &Error{"parse", rawurl, err}
   345		}
   346		return url, nil
   347	}
   348	
   349	// ParseRequestURI parses rawurl into a URL structure.  It assumes that
   350	// rawurl was received in an HTTP request, so the rawurl is interpreted
   351	// only as an absolute URI or an absolute path.
   352	// The string rawurl is assumed not to have a #fragment suffix.
   353	// (Web browsers strip #fragment before sending the URL to a web server.)
   354	func ParseRequestURI(rawurl string) (url *URL, err error) {
   355		return parse(rawurl, true)
   356	}
   357	
   358	// parse parses a URL from a string in one of two contexts.  If
   359	// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
   360	// in which case only absolute URLs or path-absolute relative URLs are allowed.
   361	// If viaRequest is false, all forms of relative URLs are allowed.
   362	func parse(rawurl string, viaRequest bool) (url *URL, err error) {
   363		var rest string
   364	
   365		if rawurl == "" && viaRequest {
   366			err = errors.New("empty url")
   367			goto Error
   368		}
   369		url = new(URL)
   370	
   371		if rawurl == "*" {
   372			url.Path = "*"
   373			return
   374		}
   375	
   376		// Split off possible leading "http:", "mailto:", etc.
   377		// Cannot contain escaped characters.
   378		if url.Scheme, rest, err = getscheme(rawurl); err != nil {
   379			goto Error
   380		}
   381		url.Scheme = strings.ToLower(url.Scheme)
   382	
   383		rest, url.RawQuery = split(rest, "?", true)
   384	
   385		if !strings.HasPrefix(rest, "/") {
   386			if url.Scheme != "" {
   387				// We consider rootless paths per RFC 3986 as opaque.
   388				url.Opaque = rest
   389				return url, nil
   390			}
   391			if viaRequest {
   392				err = errors.New("invalid URI for request")
   393				goto Error
   394			}
   395		}
   396	
   397		if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
   398			var authority string
   399			authority, rest = split(rest[2:], "/", false)
   400			url.User, url.Host, err = parseAuthority(authority)
   401			if err != nil {
   402				goto Error
   403			}
   404			if strings.Contains(url.Host, "%") {
   405				err = errors.New("hexadecimal escape in host")
   406				goto Error
   407			}
   408		}
   409		if url.Path, err = unescape(rest, encodePath); err != nil {
   410			goto Error
   411		}
   412		return url, nil
   413	
   414	Error:
   415		return nil, &Error{"parse", rawurl, err}
   416	}
   417	
   418	func parseAuthority(authority string) (user *Userinfo, host string, err error) {
   419		i := strings.LastIndex(authority, "@")
   420		if i < 0 {
   421			host = authority
   422			return
   423		}
   424		userinfo, host := authority[:i], authority[i+1:]
   425		if strings.Index(userinfo, ":") < 0 {
   426			if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
   427				return
   428			}
   429			user = User(userinfo)
   430		} else {
   431			username, password := split(userinfo, ":", true)
   432			if username, err = unescape(username, encodeUserPassword); err != nil {
   433				return
   434			}
   435			if password, err = unescape(password, encodeUserPassword); err != nil {
   436				return
   437			}
   438			user = UserPassword(username, password)
   439		}
   440		return
   441	}
   442	
   443	// String reassembles the URL into a valid URL string.
   444	// The general form of the result is one of:
   445	//
   446	//	scheme:opaque
   447	//	scheme://userinfo@host/path?query#fragment
   448	//
   449	// If u.Opaque is non-empty, String uses the first form;
   450	// otherwise it uses the second form.
   451	//
   452	// In the second form, the following rules apply:
   453	//	- if u.Scheme is empty, scheme: is omitted.
   454	//	- if u.User is nil, userinfo@ is omitted.
   455	//	- if u.Host is empty, host/ is omitted.
   456	//	- if u.Scheme and u.Host are empty and u.User is nil,
   457	//	   the entire scheme://userinfo@host/ is omitted.
   458	//	- if u.Host is non-empty and u.Path begins with a /,
   459	//	   the form host/path does not add its own /.
   460	//	- if u.RawQuery is empty, ?query is omitted.
   461	//	- if u.Fragment is empty, #fragment is omitted.
   462	func (u *URL) String() string {
   463		var buf bytes.Buffer
   464		if u.Scheme != "" {
   465			buf.WriteString(u.Scheme)
   466			buf.WriteByte(':')
   467		}
   468		if u.Opaque != "" {
   469			buf.WriteString(u.Opaque)
   470		} else {
   471			if u.Scheme != "" || u.Host != "" || u.User != nil {
   472				buf.WriteString("//")
   473				if ui := u.User; ui != nil {
   474					buf.WriteString(ui.String())
   475					buf.WriteByte('@')
   476				}
   477				if h := u.Host; h != "" {
   478					buf.WriteString(h)
   479				}
   480			}
   481			if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
   482				buf.WriteByte('/')
   483			}
   484			buf.WriteString(escape(u.Path, encodePath))
   485		}
   486		if u.RawQuery != "" {
   487			buf.WriteByte('?')
   488			buf.WriteString(u.RawQuery)
   489		}
   490		if u.Fragment != "" {
   491			buf.WriteByte('#')
   492			buf.WriteString(escape(u.Fragment, encodeFragment))
   493		}
   494		return buf.String()
   495	}
   496	
   497	// Values maps a string key to a list of values.
   498	// It is typically used for query parameters and form values.
   499	// Unlike in the http.Header map, the keys in a Values map
   500	// are case-sensitive.
   501	type Values map[string][]string
   502	
   503	// Get gets the first value associated with the given key.
   504	// If there are no values associated with the key, Get returns
   505	// the empty string. To access multiple values, use the map
   506	// directly.
   507	func (v Values) Get(key string) string {
   508		if v == nil {
   509			return ""
   510		}
   511		vs, ok := v[key]
   512		if !ok || len(vs) == 0 {
   513			return ""
   514		}
   515		return vs[0]
   516	}
   517	
   518	// Set sets the key to value. It replaces any existing
   519	// values.
   520	func (v Values) Set(key, value string) {
   521		v[key] = []string{value}
   522	}
   523	
   524	// Add adds the value to key. It appends to any existing
   525	// values associated with key.
   526	func (v Values) Add(key, value string) {
   527		v[key] = append(v[key], value)
   528	}
   529	
   530	// Del deletes the values associated with key.
   531	func (v Values) Del(key string) {
   532		delete(v, key)
   533	}
   534	
   535	// ParseQuery parses the URL-encoded query string and returns
   536	// a map listing the values specified for each key.
   537	// ParseQuery always returns a non-nil map containing all the
   538	// valid query parameters found; err describes the first decoding error
   539	// encountered, if any.
   540	func ParseQuery(query string) (m Values, err error) {
   541		m = make(Values)
   542		err = parseQuery(m, query)
   543		return
   544	}
   545	
   546	func parseQuery(m Values, query string) (err error) {
   547		for query != "" {
   548			key := query
   549			if i := strings.IndexAny(key, "&;"); i >= 0 {
   550				key, query = key[:i], key[i+1:]
   551			} else {
   552				query = ""
   553			}
   554			if key == "" {
   555				continue
   556			}
   557			value := ""
   558			if i := strings.Index(key, "="); i >= 0 {
   559				key, value = key[:i], key[i+1:]
   560			}
   561			key, err1 := QueryUnescape(key)
   562			if err1 != nil {
   563				if err == nil {
   564					err = err1
   565				}
   566				continue
   567			}
   568			value, err1 = QueryUnescape(value)
   569			if err1 != nil {
   570				if err == nil {
   571					err = err1
   572				}
   573				continue
   574			}
   575			m[key] = append(m[key], value)
   576		}
   577		return err
   578	}
   579	
   580	// Encode encodes the values into ``URL encoded'' form
   581	// ("bar=baz&foo=quux") sorted by key.
   582	func (v Values) Encode() string {
   583		if v == nil {
   584			return ""
   585		}
   586		var buf bytes.Buffer
   587		keys := make([]string, 0, len(v))
   588		for k := range v {
   589			keys = append(keys, k)
   590		}
   591		sort.Strings(keys)
   592		for _, k := range keys {
   593			vs := v[k]
   594			prefix := QueryEscape(k) + "="
   595			for _, v := range vs {
   596				if buf.Len() > 0 {
   597					buf.WriteByte('&')
   598				}
   599				buf.WriteString(prefix)
   600				buf.WriteString(QueryEscape(v))
   601			}
   602		}
   603		return buf.String()
   604	}
   605	
   606	// resolvePath applies special path segments from refs and applies
   607	// them to base, per RFC 3986.
   608	func resolvePath(base, ref string) string {
   609		var full string
   610		if ref == "" {
   611			full = base
   612		} else if ref[0] != '/' {
   613			i := strings.LastIndex(base, "/")
   614			full = base[:i+1] + ref
   615		} else {
   616			full = ref
   617		}
   618		if full == "" {
   619			return ""
   620		}
   621		var dst []string
   622		src := strings.Split(full, "/")
   623		for _, elem := range src {
   624			switch elem {
   625			case ".":
   626				// drop
   627			case "..":
   628				if len(dst) > 0 {
   629					dst = dst[:len(dst)-1]
   630				}
   631			default:
   632				dst = append(dst, elem)
   633			}
   634		}
   635		if last := src[len(src)-1]; last == "." || last == ".." {
   636			// Add final slash to the joined path.
   637			dst = append(dst, "")
   638		}
   639		return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/")
   640	}
   641	
   642	// IsAbs returns true if the URL is absolute.
   643	func (u *URL) IsAbs() bool {
   644		return u.Scheme != ""
   645	}
   646	
   647	// Parse parses a URL in the context of the receiver.  The provided URL
   648	// may be relative or absolute.  Parse returns nil, err on parse
   649	// failure, otherwise its return value is the same as ResolveReference.
   650	func (u *URL) Parse(ref string) (*URL, error) {
   651		refurl, err := Parse(ref)
   652		if err != nil {
   653			return nil, err
   654		}
   655		return u.ResolveReference(refurl), nil
   656	}
   657	
   658	// ResolveReference resolves a URI reference to an absolute URI from
   659	// an absolute base URI, per RFC 3986 Section 5.2.  The URI reference
   660	// may be relative or absolute.  ResolveReference always returns a new
   661	// URL instance, even if the returned URL is identical to either the
   662	// base or reference. If ref is an absolute URL, then ResolveReference
   663	// ignores base and returns a copy of ref.
   664	func (u *URL) ResolveReference(ref *URL) *URL {
   665		url := *ref
   666		if ref.Scheme == "" {
   667			url.Scheme = u.Scheme
   668		}
   669		if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
   670			// The "absoluteURI" or "net_path" cases.
   671			url.Path = resolvePath(ref.Path, "")
   672			return &url
   673		}
   674		if ref.Opaque != "" {
   675			url.User = nil
   676			url.Host = ""
   677			url.Path = ""
   678			return &url
   679		}
   680		if ref.Path == "" {
   681			if ref.RawQuery == "" {
   682				url.RawQuery = u.RawQuery
   683				if ref.Fragment == "" {
   684					url.Fragment = u.Fragment
   685				}
   686			}
   687		}
   688		// The "abs_path" or "rel_path" cases.
   689		url.Host = u.Host
   690		url.User = u.User
   691		url.Path = resolvePath(u.Path, ref.Path)
   692		return &url
   693	}
   694	
   695	// Query parses RawQuery and returns the corresponding values.
   696	func (u *URL) Query() Values {
   697		v, _ := ParseQuery(u.RawQuery)
   698		return v
   699	}
   700	
   701	// RequestURI returns the encoded path?query or opaque?query
   702	// string that would be used in an HTTP request for u.
   703	func (u *URL) RequestURI() string {
   704		result := u.Opaque
   705		if result == "" {
   706			result = escape(u.Path, encodePath)
   707			if result == "" {
   708				result = "/"
   709			}
   710		} else {
   711			if strings.HasPrefix(result, "//") {
   712				result = u.Scheme + ":" + result
   713			}
   714		}
   715		if u.RawQuery != "" {
   716			result += "?" + u.RawQuery
   717		}
   718		return result
   719	}
   720	

View as plain text