...
Run Format

Source file src/html/template/url.go

Documentation: html/template

  // Copyright 2011 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package template
  
  import (
  	"bytes"
  	"fmt"
  	"strings"
  )
  
  // urlFilter returns its input unless it contains an unsafe scheme in which
  // case it defangs the entire URL.
  //
  // Schemes that cause unintended side effects that are irreversible without user
  // interaction are considered unsafe. For example, clicking on a "javascript:"
  // link can immediately trigger JavaScript code execution.
  //
  // This filter conservatively assumes that all schemes other than the following
  // are unsafe:
  //    * http:   Navigates to a new website, and may open a new window or tab.
  //              These side effects can be reversed by navigating back to the
  //              previous website, or closing the window or tab. No irreversible
  //              changes will take place without further user interaction with
  //              the new website.
  //    * https:  Same as http.
  //    * mailto: Opens an email program and starts a new draft. This side effect
  //              is not irreversible until the user explicitly clicks send; it
  //              can be undone by closing the email program.
  //
  // To allow URLs containing other schemes to bypass this filter, developers must
  // explicitly indicate that such a URL is expected and safe by encapsulating it
  // in a template.URL value.
  func urlFilter(args ...interface{}) string {
  	s, t := stringify(args...)
  	if t == contentTypeURL {
  		return s
  	}
  	if !isSafeURL(s) {
  		return "#" + filterFailsafe
  	}
  	return s
  }
  
  // isSafeURL is true if s is a relative URL or if URL has a protocol in
  // (http, https, mailto).
  func isSafeURL(s string) bool {
  	if i := strings.IndexRune(s, ':'); i >= 0 && !strings.ContainsRune(s[:i], '/') {
  
  		protocol := s[:i]
  		if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
  			return false
  		}
  	}
  	return true
  }
  
  // urlEscaper produces an output that can be embedded in a URL query.
  // The output can be embedded in an HTML attribute without further escaping.
  func urlEscaper(args ...interface{}) string {
  	return urlProcessor(false, args...)
  }
  
  // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
  // string or parenthesis delimited url(...).
  // The normalizer does not encode all HTML specials. Specifically, it does not
  // encode '&' so correct embedding in an HTML attribute requires escaping of
  // '&' to '&'.
  func urlNormalizer(args ...interface{}) string {
  	return urlProcessor(true, args...)
  }
  
  // urlProcessor normalizes (when norm is true) or escapes its input to produce
  // a valid hierarchical or opaque URL part.
  func urlProcessor(norm bool, args ...interface{}) string {
  	s, t := stringify(args...)
  	if t == contentTypeURL {
  		norm = true
  	}
  	var b bytes.Buffer
  	if processURLOnto(s, norm, &b) {
  		return b.String()
  	}
  	return s
  }
  
  // processURLOnto appends a normalized URL corresponding to its input to b
  // and returns true if the appended content differs from s.
  func processURLOnto(s string, norm bool, b *bytes.Buffer) bool {
  	b.Grow(len(s) + 16)
  	written := 0
  	// The byte loop below assumes that all URLs use UTF-8 as the
  	// content-encoding. This is similar to the URI to IRI encoding scheme
  	// defined in section 3.1 of  RFC 3987, and behaves the same as the
  	// EcmaScript builtin encodeURIComponent.
  	// It should not cause any misencoding of URLs in pages with
  	// Content-type: text/html;charset=UTF-8.
  	for i, n := 0, len(s); i < n; i++ {
  		c := s[i]
  		switch c {
  		// Single quote and parens are sub-delims in RFC 3986, but we
  		// escape them so the output can be embedded in single
  		// quoted attributes and unquoted CSS url(...) constructs.
  		// Single quotes are reserved in URLs, but are only used in
  		// the obsolete "mark" rule in an appendix in RFC 3986
  		// so can be safely encoded.
  		case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
  			if norm {
  				continue
  			}
  		// Unreserved according to RFC 3986 sec 2.3
  		// "For consistency, percent-encoded octets in the ranges of
  		// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
  		// period (%2E), underscore (%5F), or tilde (%7E) should not be
  		// created by URI producers
  		case '-', '.', '_', '~':
  			continue
  		case '%':
  			// When normalizing do not re-encode valid escapes.
  			if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
  				continue
  			}
  		default:
  			// Unreserved according to RFC 3986 sec 2.3
  			if 'a' <= c && c <= 'z' {
  				continue
  			}
  			if 'A' <= c && c <= 'Z' {
  				continue
  			}
  			if '0' <= c && c <= '9' {
  				continue
  			}
  		}
  		b.WriteString(s[written:i])
  		fmt.Fprintf(b, "%%%02x", c)
  		written = i + 1
  	}
  	b.WriteString(s[written:])
  	return written != 0
  }
  
  // Filters and normalizes srcset values which are comma separated
  // URLs followed by metadata.
  func srcsetFilterAndEscaper(args ...interface{}) string {
  	s, t := stringify(args...)
  	switch t {
  	case contentTypeSrcset:
  		return s
  	case contentTypeURL:
  		// Normalizing gets rid of all HTML whitespace
  		// which separate the image URL from its metadata.
  		var b bytes.Buffer
  		if processURLOnto(s, true, &b) {
  			s = b.String()
  		}
  		// Additionally, commas separate one source from another.
  		return strings.Replace(s, ",", "%2c", -1)
  	}
  
  	var b bytes.Buffer
  	written := 0
  	for i := 0; i < len(s); i++ {
  		if s[i] == ',' {
  			filterSrcsetElement(s, written, i, &b)
  			b.WriteString(",")
  			written = i + 1
  		}
  	}
  	filterSrcsetElement(s, written, len(s), &b)
  	return b.String()
  }
  
  // Derived from https://play.golang.org/p/Dhmj7FORT5
  const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
  
  // isHTMLSpace is true iff c is a whitespace character per
  // https://infra.spec.whatwg.org/#ascii-whitespace
  func isHTMLSpace(c byte) bool {
  	return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
  }
  
  func isHTMLSpaceOrASCIIAlnum(c byte) bool {
  	return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
  }
  
  func filterSrcsetElement(s string, left int, right int, b *bytes.Buffer) {
  	start := left
  	for start < right && isHTMLSpace(s[start]) {
  		start++
  	}
  	end := right
  	for i := start; i < right; i++ {
  		if isHTMLSpace(s[i]) {
  			end = i
  			break
  		}
  	}
  	if url := s[start:end]; isSafeURL(url) {
  		// If image metadata is only spaces or alnums then
  		// we don't need to URL normalize it.
  		metadataOk := true
  		for i := end; i < right; i++ {
  			if !isHTMLSpaceOrASCIIAlnum(s[i]) {
  				metadataOk = false
  				break
  			}
  		}
  		if metadataOk {
  			b.WriteString(s[left:start])
  			processURLOnto(url, true, b)
  			b.WriteString(s[end:right])
  			return
  		}
  	}
  	b.WriteString("#")
  	b.WriteString(filterFailsafe)
  }
  

View as plain text