...
Run Format

Source file src/html/template/js.go

  // Copyright 2011 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package template
  
  import (
  	"bytes"
  	"encoding/json"
  	"fmt"
  	"reflect"
  	"strings"
  	"unicode/utf8"
  )
  
  // nextJSCtx returns the context that determines whether a slash after the
  // given run of tokens starts a regular expression instead of a division
  // operator: / or /=.
  //
  // This assumes that the token run does not include any string tokens, comment
  // tokens, regular expression literal tokens, or division operators.
  //
  // This fails on some valid but nonsensical JavaScript programs like
  // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
  // fail on any known useful programs. It is based on the draft
  // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
  // http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
  func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
  	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
  	if len(s) == 0 {
  		return preceding
  	}
  
  	// All cases below are in the single-byte UTF-8 group.
  	switch c, n := s[len(s)-1], len(s); c {
  	case '+', '-':
  		// ++ and -- are not regexp preceders, but + and - are whether
  		// they are used as infix or prefix operators.
  		start := n - 1
  		// Count the number of adjacent dashes or pluses.
  		for start > 0 && s[start-1] == c {
  			start--
  		}
  		if (n-start)&1 == 1 {
  			// Reached for trailing minus signs since "---" is the
  			// same as "-- -".
  			return jsCtxRegexp
  		}
  		return jsCtxDivOp
  	case '.':
  		// Handle "42."
  		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
  			return jsCtxDivOp
  		}
  		return jsCtxRegexp
  	// Suffixes for all punctuators from section 7.7 of the language spec
  	// that only end binary operators not handled above.
  	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
  		return jsCtxRegexp
  	// Suffixes for all punctuators from section 7.7 of the language spec
  	// that are prefix operators not handled above.
  	case '!', '~':
  		return jsCtxRegexp
  	// Matches all the punctuators from section 7.7 of the language spec
  	// that are open brackets not handled above.
  	case '(', '[':
  		return jsCtxRegexp
  	// Matches all the punctuators from section 7.7 of the language spec
  	// that precede expression starts.
  	case ':', ';', '{':
  		return jsCtxRegexp
  	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
  	// are handled in the default except for '}' which can precede a
  	// division op as in
  	//    ({ valueOf: function () { return 42 } } / 2
  	// which is valid, but, in practice, developers don't divide object
  	// literals, so our heuristic works well for code like
  	//    function () { ... }  /foo/.test(x) && sideEffect();
  	// The ')' punctuator can precede a regular expression as in
  	//     if (b) /foo/.test(x) && ...
  	// but this is much less likely than
  	//     (a + b) / c
  	case '}':
  		return jsCtxRegexp
  	default:
  		// Look for an IdentifierName and see if it is a keyword that
  		// can precede a regular expression.
  		j := n
  		for j > 0 && isJSIdentPart(rune(s[j-1])) {
  			j--
  		}
  		if regexpPrecederKeywords[string(s[j:])] {
  			return jsCtxRegexp
  		}
  	}
  	// Otherwise is a punctuator not listed above, or
  	// a string which precedes a div op, or an identifier
  	// which precedes a div op.
  	return jsCtxDivOp
  }
  
  // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
  // regular expression in JS source.
  var regexpPrecederKeywords = map[string]bool{
  	"break":      true,
  	"case":       true,
  	"continue":   true,
  	"delete":     true,
  	"do":         true,
  	"else":       true,
  	"finally":    true,
  	"in":         true,
  	"instanceof": true,
  	"return":     true,
  	"throw":      true,
  	"try":        true,
  	"typeof":     true,
  	"void":       true,
  }
  
  var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
  
  // indirectToJSONMarshaler returns the value, after dereferencing as many times
  // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
  func indirectToJSONMarshaler(a interface{}) interface{} {
  	v := reflect.ValueOf(a)
  	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
  		v = v.Elem()
  	}
  	return v.Interface()
  }
  
  // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
  // neither side-effects nor free variables outside (NaN, Infinity).
  func jsValEscaper(args ...interface{}) string {
  	var a interface{}
  	if len(args) == 1 {
  		a = indirectToJSONMarshaler(args[0])
  		switch t := a.(type) {
  		case JS:
  			return string(t)
  		case JSStr:
  			// TODO: normalize quotes.
  			return `"` + string(t) + `"`
  		case json.Marshaler:
  			// Do not treat as a Stringer.
  		case fmt.Stringer:
  			a = t.String()
  		}
  	} else {
  		for i, arg := range args {
  			args[i] = indirectToJSONMarshaler(arg)
  		}
  		a = fmt.Sprint(args...)
  	}
  	// TODO: detect cycles before calling Marshal which loops infinitely on
  	// cyclic data. This may be an unacceptable DoS risk.
  
  	b, err := json.Marshal(a)
  	if err != nil {
  		// Put a space before comment so that if it is flush against
  		// a division operator it is not turned into a line comment:
  		//     x/{{y}}
  		// turning into
  		//     x//* error marshaling y:
  		//          second line of error message */null
  		return fmt.Sprintf(" /* %s */null ", strings.Replace(err.Error(), "*/", "* /", -1))
  	}
  
  	// TODO: maybe post-process output to prevent it from containing
  	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
  	// in case custom marshalers produce output containing those.
  
  	// TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
  	if len(b) == 0 {
  		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
  		// not cause the output `x=y/*z`.
  		return " null "
  	}
  	first, _ := utf8.DecodeRune(b)
  	last, _ := utf8.DecodeLastRune(b)
  	var buf bytes.Buffer
  	// Prevent IdentifierNames and NumericLiterals from running into
  	// keywords: in, instanceof, typeof, void
  	pad := isJSIdentPart(first) || isJSIdentPart(last)
  	if pad {
  		buf.WriteByte(' ')
  	}
  	written := 0
  	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
  	// so it falls within the subset of JSON which is valid JS.
  	for i := 0; i < len(b); {
  		rune, n := utf8.DecodeRune(b[i:])
  		repl := ""
  		if rune == 0x2028 {
  			repl = `\u2028`
  		} else if rune == 0x2029 {
  			repl = `\u2029`
  		}
  		if repl != "" {
  			buf.Write(b[written:i])
  			buf.WriteString(repl)
  			written = i + n
  		}
  		i += n
  	}
  	if buf.Len() != 0 {
  		buf.Write(b[written:])
  		if pad {
  			buf.WriteByte(' ')
  		}
  		b = buf.Bytes()
  	}
  	return string(b)
  }
  
  // jsStrEscaper produces a string that can be included between quotes in
  // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
  // or in an HTML5 event handler attribute such as onclick.
  func jsStrEscaper(args ...interface{}) string {
  	s, t := stringify(args...)
  	if t == contentTypeJSStr {
  		return replace(s, jsStrNormReplacementTable)
  	}
  	return replace(s, jsStrReplacementTable)
  }
  
  // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
  // specials so the result is treated literally when included in a regular
  // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
  // the literal text of {{.X}} followed by the string "bar".
  func jsRegexpEscaper(args ...interface{}) string {
  	s, _ := stringify(args...)
  	s = replace(s, jsRegexpReplacementTable)
  	if s == "" {
  		// /{{.X}}/ should not produce a line comment when .X == "".
  		return "(?:)"
  	}
  	return s
  }
  
  // replace replaces each rune r of s with replacementTable[r], provided that
  // r < len(replacementTable). If replacementTable[r] is the empty string then
  // no replacement is made.
  // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
  // `\u2029`.
  func replace(s string, replacementTable []string) string {
  	var b bytes.Buffer
  	r, w, written := rune(0), 0, 0
  	for i := 0; i < len(s); i += w {
  		// See comment in htmlEscaper.
  		r, w = utf8.DecodeRuneInString(s[i:])
  		var repl string
  		switch {
  		case int(r) < len(replacementTable) && replacementTable[r] != "":
  			repl = replacementTable[r]
  		case r == '\u2028':
  			repl = `\u2028`
  		case r == '\u2029':
  			repl = `\u2029`
  		default:
  			continue
  		}
  		b.WriteString(s[written:i])
  		b.WriteString(repl)
  		written = i + w
  	}
  	if written == 0 {
  		return s
  	}
  	b.WriteString(s[written:])
  	return b.String()
  }
  
  var jsStrReplacementTable = []string{
  	0:    `\0`,
  	'\t': `\t`,
  	'\n': `\n`,
  	'\v': `\x0b`, // "\v" == "v" on IE 6.
  	'\f': `\f`,
  	'\r': `\r`,
  	// Encode HTML specials as hex so the output can be embedded
  	// in HTML attributes without further encoding.
  	'"':  `\x22`,
  	'&':  `\x26`,
  	'\'': `\x27`,
  	'+':  `\x2b`,
  	'/':  `\/`,
  	'<':  `\x3c`,
  	'>':  `\x3e`,
  	'\\': `\\`,
  }
  
  // jsStrNormReplacementTable is like jsStrReplacementTable but does not
  // overencode existing escapes since this table has no entry for `\`.
  var jsStrNormReplacementTable = []string{
  	0:    `\0`,
  	'\t': `\t`,
  	'\n': `\n`,
  	'\v': `\x0b`, // "\v" == "v" on IE 6.
  	'\f': `\f`,
  	'\r': `\r`,
  	// Encode HTML specials as hex so the output can be embedded
  	// in HTML attributes without further encoding.
  	'"':  `\x22`,
  	'&':  `\x26`,
  	'\'': `\x27`,
  	'+':  `\x2b`,
  	'/':  `\/`,
  	'<':  `\x3c`,
  	'>':  `\x3e`,
  }
  
  var jsRegexpReplacementTable = []string{
  	0:    `\0`,
  	'\t': `\t`,
  	'\n': `\n`,
  	'\v': `\x0b`, // "\v" == "v" on IE 6.
  	'\f': `\f`,
  	'\r': `\r`,
  	// Encode HTML specials as hex so the output can be embedded
  	// in HTML attributes without further encoding.
  	'"':  `\x22`,
  	'$':  `\$`,
  	'&':  `\x26`,
  	'\'': `\x27`,
  	'(':  `\(`,
  	')':  `\)`,
  	'*':  `\*`,
  	'+':  `\x2b`,
  	'-':  `\-`,
  	'.':  `\.`,
  	'/':  `\/`,
  	'<':  `\x3c`,
  	'>':  `\x3e`,
  	'?':  `\?`,
  	'[':  `\[`,
  	'\\': `\\`,
  	']':  `\]`,
  	'^':  `\^`,
  	'{':  `\{`,
  	'|':  `\|`,
  	'}':  `\}`,
  }
  
  // isJSIdentPart reports whether the given rune is a JS identifier part.
  // It does not handle all the non-Latin letters, joiners, and combining marks,
  // but it does handle every codepoint that can occur in a numeric literal or
  // a keyword.
  func isJSIdentPart(r rune) bool {
  	switch {
  	case r == '$':
  		return true
  	case '0' <= r && r <= '9':
  		return true
  	case 'A' <= r && r <= 'Z':
  		return true
  	case r == '_':
  		return true
  	case 'a' <= r && r <= 'z':
  		return true
  	}
  	return false
  }
  
  // isJSType returns true if the given MIME type should be considered JavaScript.
  //
  // It is used to determine whether a script tag with a type attribute is a javascript container.
  func isJSType(mimeType string) bool {
  	// per
  	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
  	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
  	//   https://tools.ietf.org/html/rfc4329#section-3
  	//   https://www.ietf.org/rfc/rfc4627.txt
  
  	// discard parameters
  	if i := strings.Index(mimeType, ";"); i >= 0 {
  		mimeType = mimeType[:i]
  	}
  	mimeType = strings.TrimSpace(mimeType)
  	switch mimeType {
  	case
  		"application/ecmascript",
  		"application/javascript",
  		"application/json",
  		"application/x-ecmascript",
  		"application/x-javascript",
  		"text/ecmascript",
  		"text/javascript",
  		"text/javascript1.0",
  		"text/javascript1.1",
  		"text/javascript1.2",
  		"text/javascript1.3",
  		"text/javascript1.4",
  		"text/javascript1.5",
  		"text/jscript",
  		"text/livescript",
  		"text/x-ecmascript",
  		"text/x-javascript":
  		return true
  	default:
  		return false
  	}
  }
  

View as plain text