...
Run Format

Source file src/html/template/transition.go

  // Copyright 2011 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package template
  
  import (
  	"bytes"
  	"strings"
  )
  
  // transitionFunc is the array of context transition functions for text nodes.
  // A transition function takes a context and template text input, and returns
  // the updated context and the number of bytes consumed from the front of the
  // input.
  var transitionFunc = [...]func(context, []byte) (context, int){
  	stateText:        tText,
  	stateTag:         tTag,
  	stateAttrName:    tAttrName,
  	stateAfterName:   tAfterName,
  	stateBeforeValue: tBeforeValue,
  	stateHTMLCmt:     tHTMLCmt,
  	stateRCDATA:      tSpecialTagEnd,
  	stateAttr:        tAttr,
  	stateURL:         tURL,
  	stateJS:          tJS,
  	stateJSDqStr:     tJSDelimited,
  	stateJSSqStr:     tJSDelimited,
  	stateJSRegexp:    tJSDelimited,
  	stateJSBlockCmt:  tBlockCmt,
  	stateJSLineCmt:   tLineCmt,
  	stateCSS:         tCSS,
  	stateCSSDqStr:    tCSSStr,
  	stateCSSSqStr:    tCSSStr,
  	stateCSSDqURL:    tCSSStr,
  	stateCSSSqURL:    tCSSStr,
  	stateCSSURL:      tCSSStr,
  	stateCSSBlockCmt: tBlockCmt,
  	stateCSSLineCmt:  tLineCmt,
  	stateError:       tError,
  }
  
  var commentStart = []byte("<!--")
  var commentEnd = []byte("-->")
  
  // tText is the context transition function for the text state.
  func tText(c context, s []byte) (context, int) {
  	k := 0
  	for {
  		i := k + bytes.IndexByte(s[k:], '<')
  		if i < k || i+1 == len(s) {
  			return c, len(s)
  		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
  			return context{state: stateHTMLCmt}, i + 4
  		}
  		i++
  		end := false
  		if s[i] == '/' {
  			if i+1 == len(s) {
  				return c, len(s)
  			}
  			end, i = true, i+1
  		}
  		j, e := eatTagName(s, i)
  		if j != i {
  			if end {
  				e = elementNone
  			}
  			// We've found an HTML tag.
  			return context{state: stateTag, element: e}, j
  		}
  		k = j
  	}
  }
  
  var elementContentType = [...]state{
  	elementNone:     stateText,
  	elementScript:   stateJS,
  	elementStyle:    stateCSS,
  	elementTextarea: stateRCDATA,
  	elementTitle:    stateRCDATA,
  }
  
  // tTag is the context transition function for the tag state.
  func tTag(c context, s []byte) (context, int) {
  	// Find the attribute name.
  	i := eatWhiteSpace(s, 0)
  	if i == len(s) {
  		return c, len(s)
  	}
  	if s[i] == '>' {
  		return context{
  			state:   elementContentType[c.element],
  			element: c.element,
  		}, i + 1
  	}
  	j, err := eatAttrName(s, i)
  	if err != nil {
  		return context{state: stateError, err: err}, len(s)
  	}
  	state, attr := stateTag, attrNone
  	if i == j {
  		return context{
  			state: stateError,
  			err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
  		}, len(s)
  	}
  
  	attrName := string(s[i:j])
  	if c.element == elementScript && attrName == "type" {
  		attr = attrScriptType
  	} else {
  		switch attrType(attrName) {
  		case contentTypeURL:
  			attr = attrURL
  		case contentTypeCSS:
  			attr = attrStyle
  		case contentTypeJS:
  			attr = attrScript
  		}
  	}
  
  	if j == len(s) {
  		state = stateAttrName
  	} else {
  		state = stateAfterName
  	}
  	return context{state: state, element: c.element, attr: attr}, j
  }
  
  // tAttrName is the context transition function for stateAttrName.
  func tAttrName(c context, s []byte) (context, int) {
  	i, err := eatAttrName(s, 0)
  	if err != nil {
  		return context{state: stateError, err: err}, len(s)
  	} else if i != len(s) {
  		c.state = stateAfterName
  	}
  	return c, i
  }
  
  // tAfterName is the context transition function for stateAfterName.
  func tAfterName(c context, s []byte) (context, int) {
  	// Look for the start of the value.
  	i := eatWhiteSpace(s, 0)
  	if i == len(s) {
  		return c, len(s)
  	} else if s[i] != '=' {
  		// Occurs due to tag ending '>', and valueless attribute.
  		c.state = stateTag
  		return c, i
  	}
  	c.state = stateBeforeValue
  	// Consume the "=".
  	return c, i + 1
  }
  
  var attrStartStates = [...]state{
  	attrNone:       stateAttr,
  	attrScript:     stateJS,
  	attrScriptType: stateAttr,
  	attrStyle:      stateCSS,
  	attrURL:        stateURL,
  }
  
  // tBeforeValue is the context transition function for stateBeforeValue.
  func tBeforeValue(c context, s []byte) (context, int) {
  	i := eatWhiteSpace(s, 0)
  	if i == len(s) {
  		return c, len(s)
  	}
  	// Find the attribute delimiter.
  	delim := delimSpaceOrTagEnd
  	switch s[i] {
  	case '\'':
  		delim, i = delimSingleQuote, i+1
  	case '"':
  		delim, i = delimDoubleQuote, i+1
  	}
  	c.state, c.delim = attrStartStates[c.attr], delim
  	return c, i
  }
  
  // tHTMLCmt is the context transition function for stateHTMLCmt.
  func tHTMLCmt(c context, s []byte) (context, int) {
  	if i := bytes.Index(s, commentEnd); i != -1 {
  		return context{}, i + 3
  	}
  	return c, len(s)
  }
  
  // specialTagEndMarkers maps element types to the character sequence that
  // case-insensitively signals the end of the special tag body.
  var specialTagEndMarkers = [...][]byte{
  	elementScript:   []byte("script"),
  	elementStyle:    []byte("style"),
  	elementTextarea: []byte("textarea"),
  	elementTitle:    []byte("title"),
  }
  
  var (
  	specialTagEndPrefix = []byte("</")
  	tagEndSeparators    = []byte("> \t\n\f/")
  )
  
  // tSpecialTagEnd is the context transition function for raw text and RCDATA
  // element states.
  func tSpecialTagEnd(c context, s []byte) (context, int) {
  	if c.element != elementNone {
  		if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
  			return context{}, i
  		}
  	}
  	return c, len(s)
  }
  
  // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
  func indexTagEnd(s []byte, tag []byte) int {
  	res := 0
  	plen := len(specialTagEndPrefix)
  	for len(s) > 0 {
  		// Try to find the tag end prefix first
  		i := bytes.Index(s, specialTagEndPrefix)
  		if i == -1 {
  			return i
  		}
  		s = s[i+plen:]
  		// Try to match the actual tag if there is still space for it
  		if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
  			s = s[len(tag):]
  			// Check the tag is followed by a proper separator
  			if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
  				return res + i
  			}
  			res += len(tag)
  		}
  		res += i + plen
  	}
  	return -1
  }
  
  // tAttr is the context transition function for the attribute state.
  func tAttr(c context, s []byte) (context, int) {
  	return c, len(s)
  }
  
  // tURL is the context transition function for the URL state.
  func tURL(c context, s []byte) (context, int) {
  	if bytes.IndexAny(s, "#?") >= 0 {
  		c.urlPart = urlPartQueryOrFrag
  	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
  		// HTML5 uses "Valid URL potentially surrounded by spaces" for
  		// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
  		c.urlPart = urlPartPreQuery
  	}
  	return c, len(s)
  }
  
  // tJS is the context transition function for the JS state.
  func tJS(c context, s []byte) (context, int) {
  	i := bytes.IndexAny(s, `"'/`)
  	if i == -1 {
  		// Entire input is non string, comment, regexp tokens.
  		c.jsCtx = nextJSCtx(s, c.jsCtx)
  		return c, len(s)
  	}
  	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
  	switch s[i] {
  	case '"':
  		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
  	case '\'':
  		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
  	case '/':
  		switch {
  		case i+1 < len(s) && s[i+1] == '/':
  			c.state, i = stateJSLineCmt, i+1
  		case i+1 < len(s) && s[i+1] == '*':
  			c.state, i = stateJSBlockCmt, i+1
  		case c.jsCtx == jsCtxRegexp:
  			c.state = stateJSRegexp
  		case c.jsCtx == jsCtxDivOp:
  			c.jsCtx = jsCtxRegexp
  		default:
  			return context{
  				state: stateError,
  				err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
  			}, len(s)
  		}
  	default:
  		panic("unreachable")
  	}
  	return c, i + 1
  }
  
  // tJSDelimited is the context transition function for the JS string and regexp
  // states.
  func tJSDelimited(c context, s []byte) (context, int) {
  	specials := `\"`
  	switch c.state {
  	case stateJSSqStr:
  		specials = `\'`
  	case stateJSRegexp:
  		specials = `\/[]`
  	}
  
  	k, inCharset := 0, false
  	for {
  		i := k + bytes.IndexAny(s[k:], specials)
  		if i < k {
  			break
  		}
  		switch s[i] {
  		case '\\':
  			i++
  			if i == len(s) {
  				return context{
  					state: stateError,
  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
  				}, len(s)
  			}
  		case '[':
  			inCharset = true
  		case ']':
  			inCharset = false
  		default:
  			// end delimiter
  			if !inCharset {
  				c.state, c.jsCtx = stateJS, jsCtxDivOp
  				return c, i + 1
  			}
  		}
  		k = i + 1
  	}
  
  	if inCharset {
  		// This can be fixed by making context richer if interpolation
  		// into charsets is desired.
  		return context{
  			state: stateError,
  			err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
  		}, len(s)
  	}
  
  	return c, len(s)
  }
  
  var blockCommentEnd = []byte("*/")
  
  // tBlockCmt is the context transition function for /*comment*/ states.
  func tBlockCmt(c context, s []byte) (context, int) {
  	i := bytes.Index(s, blockCommentEnd)
  	if i == -1 {
  		return c, len(s)
  	}
  	switch c.state {
  	case stateJSBlockCmt:
  		c.state = stateJS
  	case stateCSSBlockCmt:
  		c.state = stateCSS
  	default:
  		panic(c.state.String())
  	}
  	return c, i + 2
  }
  
  // tLineCmt is the context transition function for //comment states.
  func tLineCmt(c context, s []byte) (context, int) {
  	var lineTerminators string
  	var endState state
  	switch c.state {
  	case stateJSLineCmt:
  		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
  	case stateCSSLineCmt:
  		lineTerminators, endState = "\n\f\r", stateCSS
  		// Line comments are not part of any published CSS standard but
  		// are supported by the 4 major browsers.
  		// This defines line comments as
  		//     LINECOMMENT ::= "//" [^\n\f\d]*
  		// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
  		// newlines:
  		//     nl ::= #xA | #xD #xA | #xD | #xC
  	default:
  		panic(c.state.String())
  	}
  
  	i := bytes.IndexAny(s, lineTerminators)
  	if i == -1 {
  		return c, len(s)
  	}
  	c.state = endState
  	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
  	// "However, the LineTerminator at the end of the line is not
  	// considered to be part of the single-line comment; it is
  	// recognized separately by the lexical grammar and becomes part
  	// of the stream of input elements for the syntactic grammar."
  	return c, i
  }
  
  // tCSS is the context transition function for the CSS state.
  func tCSS(c context, s []byte) (context, int) {
  	// CSS quoted strings are almost never used except for:
  	// (1) URLs as in background: "/foo.png"
  	// (2) Multiword font-names as in font-family: "Times New Roman"
  	// (3) List separators in content values as in inline-lists:
  	//    <style>
  	//    ul.inlineList { list-style: none; padding:0 }
  	//    ul.inlineList > li { display: inline }
  	//    ul.inlineList > li:before { content: ", " }
  	//    ul.inlineList > li:first-child:before { content: "" }
  	//    </style>
  	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
  	// (4) Attribute value selectors as in a[href="http://example.com/"]
  	//
  	// We conservatively treat all strings as URLs, but make some
  	// allowances to avoid confusion.
  	//
  	// In (1), our conservative assumption is justified.
  	// In (2), valid font names do not contain ':', '?', or '#', so our
  	// conservative assumption is fine since we will never transition past
  	// urlPartPreQuery.
  	// In (3), our protocol heuristic should not be tripped, and there
  	// should not be non-space content after a '?' or '#', so as long as
  	// we only %-encode RFC 3986 reserved characters we are ok.
  	// In (4), we should URL escape for URL attributes, and for others we
  	// have the attribute name available if our conservative assumption
  	// proves problematic for real code.
  
  	k := 0
  	for {
  		i := k + bytes.IndexAny(s[k:], `("'/`)
  		if i < k {
  			return c, len(s)
  		}
  		switch s[i] {
  		case '(':
  			// Look for url to the left.
  			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
  			if endsWithCSSKeyword(p, "url") {
  				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
  				switch {
  				case j != len(s) && s[j] == '"':
  					c.state, j = stateCSSDqURL, j+1
  				case j != len(s) && s[j] == '\'':
  					c.state, j = stateCSSSqURL, j+1
  				default:
  					c.state = stateCSSURL
  				}
  				return c, j
  			}
  		case '/':
  			if i+1 < len(s) {
  				switch s[i+1] {
  				case '/':
  					c.state = stateCSSLineCmt
  					return c, i + 2
  				case '*':
  					c.state = stateCSSBlockCmt
  					return c, i + 2
  				}
  			}
  		case '"':
  			c.state = stateCSSDqStr
  			return c, i + 1
  		case '\'':
  			c.state = stateCSSSqStr
  			return c, i + 1
  		}
  		k = i + 1
  	}
  }
  
  // tCSSStr is the context transition function for the CSS string and URL states.
  func tCSSStr(c context, s []byte) (context, int) {
  	var endAndEsc string
  	switch c.state {
  	case stateCSSDqStr, stateCSSDqURL:
  		endAndEsc = `\"`
  	case stateCSSSqStr, stateCSSSqURL:
  		endAndEsc = `\'`
  	case stateCSSURL:
  		// Unquoted URLs end with a newline or close parenthesis.
  		// The below includes the wc (whitespace character) and nl.
  		endAndEsc = "\\\t\n\f\r )"
  	default:
  		panic(c.state.String())
  	}
  
  	k := 0
  	for {
  		i := k + bytes.IndexAny(s[k:], endAndEsc)
  		if i < k {
  			c, nread := tURL(c, decodeCSS(s[k:]))
  			return c, k + nread
  		}
  		if s[i] == '\\' {
  			i++
  			if i == len(s) {
  				return context{
  					state: stateError,
  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
  				}, len(s)
  			}
  		} else {
  			c.state = stateCSS
  			return c, i + 1
  		}
  		c, _ = tURL(c, decodeCSS(s[:i+1]))
  		k = i + 1
  	}
  }
  
  // tError is the context transition function for the error state.
  func tError(c context, s []byte) (context, int) {
  	return c, len(s)
  }
  
  // eatAttrName returns the largest j such that s[i:j] is an attribute name.
  // It returns an error if s[i:] does not look like it begins with an
  // attribute name, such as encountering a quote mark without a preceding
  // equals sign.
  func eatAttrName(s []byte, i int) (int, *Error) {
  	for j := i; j < len(s); j++ {
  		switch s[j] {
  		case ' ', '\t', '\n', '\f', '\r', '=', '>':
  			return j, nil
  		case '\'', '"', '<':
  			// These result in a parse warning in HTML5 and are
  			// indicative of serious problems if seen in an attr
  			// name in a template.
  			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
  		default:
  			// No-op.
  		}
  	}
  	return len(s), nil
  }
  
  var elementNameMap = map[string]element{
  	"script":   elementScript,
  	"style":    elementStyle,
  	"textarea": elementTextarea,
  	"title":    elementTitle,
  }
  
  // asciiAlpha reports whether c is an ASCII letter.
  func asciiAlpha(c byte) bool {
  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
  }
  
  // asciiAlphaNum reports whether c is an ASCII letter or digit.
  func asciiAlphaNum(c byte) bool {
  	return asciiAlpha(c) || '0' <= c && c <= '9'
  }
  
  // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
  func eatTagName(s []byte, i int) (int, element) {
  	if i == len(s) || !asciiAlpha(s[i]) {
  		return i, elementNone
  	}
  	j := i + 1
  	for j < len(s) {
  		x := s[j]
  		if asciiAlphaNum(x) {
  			j++
  			continue
  		}
  		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
  		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
  			j += 2
  			continue
  		}
  		break
  	}
  	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
  }
  
  // eatWhiteSpace returns the largest j such that s[i:j] is white space.
  func eatWhiteSpace(s []byte, i int) int {
  	for j := i; j < len(s); j++ {
  		switch s[j] {
  		case ' ', '\t', '\n', '\f', '\r':
  			// No-op.
  		default:
  			return j
  		}
  	}
  	return len(s)
  }
  

View as plain text