...
Run Format

Source file src/html/template/html.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"fmt"
    10		"strings"
    11		"unicode/utf8"
    12	)
    13	
    14	// htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
    15	func htmlNospaceEscaper(args ...interface{}) string {
    16		s, t := stringify(args...)
    17		if t == contentTypeHTML {
    18			return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
    19		}
    20		return htmlReplacer(s, htmlNospaceReplacementTable, false)
    21	}
    22	
    23	// attrEscaper escapes for inclusion in quoted attribute values.
    24	func attrEscaper(args ...interface{}) string {
    25		s, t := stringify(args...)
    26		if t == contentTypeHTML {
    27			return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
    28		}
    29		return htmlReplacer(s, htmlReplacementTable, true)
    30	}
    31	
    32	// rcdataEscaper escapes for inclusion in an RCDATA element body.
    33	func rcdataEscaper(args ...interface{}) string {
    34		s, t := stringify(args...)
    35		if t == contentTypeHTML {
    36			return htmlReplacer(s, htmlNormReplacementTable, true)
    37		}
    38		return htmlReplacer(s, htmlReplacementTable, true)
    39	}
    40	
    41	// htmlEscaper escapes for inclusion in HTML text.
    42	func htmlEscaper(args ...interface{}) string {
    43		s, t := stringify(args...)
    44		if t == contentTypeHTML {
    45			return s
    46		}
    47		return htmlReplacer(s, htmlReplacementTable, true)
    48	}
    49	
    50	// htmlReplacementTable contains the runes that need to be escaped
    51	// inside a quoted attribute value or in a text node.
    52	var htmlReplacementTable = []string{
    53		// http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
    54		// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
    55		// CHARACTER character to the current attribute's value.
    56		// "
    57		// and similarly
    58		// http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
    59		0:    "\uFFFD",
    60		'"':  """,
    61		'&':  "&",
    62		'\'': "'",
    63		'+':  "+",
    64		'<':  "&lt;",
    65		'>':  "&gt;",
    66	}
    67	
    68	// htmlNormReplacementTable is like htmlReplacementTable but without '&' to
    69	// avoid over-encoding existing entities.
    70	var htmlNormReplacementTable = []string{
    71		0:    "\uFFFD",
    72		'"':  "&#34;",
    73		'\'': "&#39;",
    74		'+':  "&#43;",
    75		'<':  "&lt;",
    76		'>':  "&gt;",
    77	}
    78	
    79	// htmlNospaceReplacementTable contains the runes that need to be escaped
    80	// inside an unquoted attribute value.
    81	// The set of runes escaped is the union of the HTML specials and
    82	// those determined by running the JS below in browsers:
    83	// <div id=d></div>
    84	// <script>(function () {
    85	// var a = [], d = document.getElementById("d"), i, c, s;
    86	// for (i = 0; i < 0x10000; ++i) {
    87	//   c = String.fromCharCode(i);
    88	//   d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
    89	//   s = d.getElementsByTagName("SPAN")[0];
    90	//   if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
    91	// }
    92	// document.write(a.join(", "));
    93	// })()</script>
    94	var htmlNospaceReplacementTable = []string{
    95		0:    "&#xfffd;",
    96		'\t': "&#9;",
    97		'\n': "&#10;",
    98		'\v': "&#11;",
    99		'\f': "&#12;",
   100		'\r': "&#13;",
   101		' ':  "&#32;",
   102		'"':  "&#34;",
   103		'&':  "&amp;",
   104		'\'': "&#39;",
   105		'+':  "&#43;",
   106		'<':  "&lt;",
   107		'=':  "&#61;",
   108		'>':  "&gt;",
   109		// A parse error in the attribute value (unquoted) and
   110		// before attribute value states.
   111		// Treated as a quoting character by IE.
   112		'`': "&#96;",
   113	}
   114	
   115	// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
   116	// without '&' to avoid over-encoding existing entities.
   117	var htmlNospaceNormReplacementTable = []string{
   118		0:    "&#xfffd;",
   119		'\t': "&#9;",
   120		'\n': "&#10;",
   121		'\v': "&#11;",
   122		'\f': "&#12;",
   123		'\r': "&#13;",
   124		' ':  "&#32;",
   125		'"':  "&#34;",
   126		'\'': "&#39;",
   127		'+':  "&#43;",
   128		'<':  "&lt;",
   129		'=':  "&#61;",
   130		'>':  "&gt;",
   131		// A parse error in the attribute value (unquoted) and
   132		// before attribute value states.
   133		// Treated as a quoting character by IE.
   134		'`': "&#96;",
   135	}
   136	
   137	// htmlReplacer returns s with runes replaced according to replacementTable
   138	// and when badRunes is true, certain bad runes are allowed through unescaped.
   139	func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
   140		written, b := 0, new(bytes.Buffer)
   141		r, w := rune(0), 0
   142		for i := 0; i < len(s); i += w {
   143			// Cannot use 'for range s' because we need to preserve the width
   144			// of the runes in the input. If we see a decoding error, the input
   145			// width will not be utf8.Runelen(r) and we will overrun the buffer.
   146			r, w = utf8.DecodeRuneInString(s[i:])
   147			if int(r) < len(replacementTable) {
   148				if repl := replacementTable[r]; len(repl) != 0 {
   149					b.WriteString(s[written:i])
   150					b.WriteString(repl)
   151					written = i + w
   152				}
   153			} else if badRunes {
   154				// No-op.
   155				// IE does not allow these ranges in unquoted attrs.
   156			} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
   157				fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
   158				written = i + w
   159			}
   160		}
   161		if written == 0 {
   162			return s
   163		}
   164		b.WriteString(s[written:])
   165		return b.String()
   166	}
   167	
   168	// stripTags takes a snippet of HTML and returns only the text content.
   169	// For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
   170	func stripTags(html string) string {
   171		var b bytes.Buffer
   172		s, c, i, allText := []byte(html), context{}, 0, true
   173		// Using the transition funcs helps us avoid mangling
   174		// `<div title="1>2">` or `I <3 Ponies!`.
   175		for i != len(s) {
   176			if c.delim == delimNone {
   177				st := c.state
   178				// Use RCDATA instead of parsing into JS or CSS styles.
   179				if c.element != elementNone && !isInTag(st) {
   180					st = stateRCDATA
   181				}
   182				d, nread := transitionFunc[st](c, s[i:])
   183				i1 := i + nread
   184				if c.state == stateText || c.state == stateRCDATA {
   185					// Emit text up to the start of the tag or comment.
   186					j := i1
   187					if d.state != c.state {
   188						for j1 := j - 1; j1 >= i; j1-- {
   189							if s[j1] == '<' {
   190								j = j1
   191								break
   192							}
   193						}
   194					}
   195					b.Write(s[i:j])
   196				} else {
   197					allText = false
   198				}
   199				c, i = d, i1
   200				continue
   201			}
   202			i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
   203			if i1 < i {
   204				break
   205			}
   206			if c.delim != delimSpaceOrTagEnd {
   207				// Consume any quote.
   208				i1++
   209			}
   210			c, i = context{state: stateTag, element: c.element}, i1
   211		}
   212		if allText {
   213			return html
   214		} else if c.state == stateText || c.state == stateRCDATA {
   215			b.Write(s[i:])
   216		}
   217		return b.String()
   218	}
   219	
   220	// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
   221	// a known-safe HTML attribute.
   222	func htmlNameFilter(args ...interface{}) string {
   223		s, t := stringify(args...)
   224		if t == contentTypeHTMLAttr {
   225			return s
   226		}
   227		if len(s) == 0 {
   228			// Avoid violation of structure preservation.
   229			// <input checked {{.K}}={{.V}}>.
   230			// Without this, if .K is empty then .V is the value of
   231			// checked, but otherwise .V is the value of the attribute
   232			// named .K.
   233			return filterFailsafe
   234		}
   235		s = strings.ToLower(s)
   236		if t := attrType(s); t != contentTypePlain {
   237			// TODO: Split attr and element name part filters so we can whitelist
   238			// attributes.
   239			return filterFailsafe
   240		}
   241		for _, r := range s {
   242			switch {
   243			case '0' <= r && r <= '9':
   244			case 'a' <= r && r <= 'z':
   245			default:
   246				return filterFailsafe
   247			}
   248		}
   249		return s
   250	}
   251	
   252	// commentEscaper returns the empty string regardless of input.
   253	// Comment content does not correspond to any parsed structure or
   254	// human-readable content, so the simplest and most secure policy is to drop
   255	// content interpolated into comments.
   256	// This approach is equally valid whether or not static comment content is
   257	// removed from the template.
   258	func commentEscaper(args ...interface{}) string {
   259		return ""
   260	}
   261	

View as plain text