...
Run Format

Source file src/pkg/html/template/transition.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"strings"
    10	)
    11	
    12	// transitionFunc is the array of context transition functions for text nodes.
    13	// A transition function takes a context and template text input, and returns
    14	// the updated context and the number of bytes consumed from the front of the
    15	// input.
    16	var transitionFunc = [...]func(context, []byte) (context, int){
    17		stateText:        tText,
    18		stateTag:         tTag,
    19		stateAttrName:    tAttrName,
    20		stateAfterName:   tAfterName,
    21		stateBeforeValue: tBeforeValue,
    22		stateHTMLCmt:     tHTMLCmt,
    23		stateRCDATA:      tSpecialTagEnd,
    24		stateAttr:        tAttr,
    25		stateURL:         tURL,
    26		stateJS:          tJS,
    27		stateJSDqStr:     tJSDelimited,
    28		stateJSSqStr:     tJSDelimited,
    29		stateJSRegexp:    tJSDelimited,
    30		stateJSBlockCmt:  tBlockCmt,
    31		stateJSLineCmt:   tLineCmt,
    32		stateCSS:         tCSS,
    33		stateCSSDqStr:    tCSSStr,
    34		stateCSSSqStr:    tCSSStr,
    35		stateCSSDqURL:    tCSSStr,
    36		stateCSSSqURL:    tCSSStr,
    37		stateCSSURL:      tCSSStr,
    38		stateCSSBlockCmt: tBlockCmt,
    39		stateCSSLineCmt:  tLineCmt,
    40		stateError:       tError,
    41	}
    42	
    43	var commentStart = []byte("<!--")
    44	var commentEnd = []byte("-->")
    45	
    46	// tText is the context transition function for the text state.
    47	func tText(c context, s []byte) (context, int) {
    48		k := 0
    49		for {
    50			i := k + bytes.IndexByte(s[k:], '<')
    51			if i < k || i+1 == len(s) {
    52				return c, len(s)
    53			} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
    54				return context{state: stateHTMLCmt}, i + 4
    55			}
    56			i++
    57			end := false
    58			if s[i] == '/' {
    59				if i+1 == len(s) {
    60					return c, len(s)
    61				}
    62				end, i = true, i+1
    63			}
    64			j, e := eatTagName(s, i)
    65			if j != i {
    66				if end {
    67					e = elementNone
    68				}
    69				// We've found an HTML tag.
    70				return context{state: stateTag, element: e}, j
    71			}
    72			k = j
    73		}
    74	}
    75	
    76	var elementContentType = [...]state{
    77		elementNone:     stateText,
    78		elementScript:   stateJS,
    79		elementStyle:    stateCSS,
    80		elementTextarea: stateRCDATA,
    81		elementTitle:    stateRCDATA,
    82	}
    83	
    84	// tTag is the context transition function for the tag state.
    85	func tTag(c context, s []byte) (context, int) {
    86		// Find the attribute name.
    87		i := eatWhiteSpace(s, 0)
    88		if i == len(s) {
    89			return c, len(s)
    90		}
    91		if s[i] == '>' {
    92			return context{
    93				state:   elementContentType[c.element],
    94				element: c.element,
    95			}, i + 1
    96		}
    97		j, err := eatAttrName(s, i)
    98		if err != nil {
    99			return context{state: stateError, err: err}, len(s)
   100		}
   101		state, attr := stateTag, attrNone
   102		if i == j {
   103			return context{
   104				state: stateError,
   105				err:   errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
   106			}, len(s)
   107		}
   108		switch attrType(string(s[i:j])) {
   109		case contentTypeURL:
   110			attr = attrURL
   111		case contentTypeCSS:
   112			attr = attrStyle
   113		case contentTypeJS:
   114			attr = attrScript
   115		}
   116		if j == len(s) {
   117			state = stateAttrName
   118		} else {
   119			state = stateAfterName
   120		}
   121		return context{state: state, element: c.element, attr: attr}, j
   122	}
   123	
   124	// tAttrName is the context transition function for stateAttrName.
   125	func tAttrName(c context, s []byte) (context, int) {
   126		i, err := eatAttrName(s, 0)
   127		if err != nil {
   128			return context{state: stateError, err: err}, len(s)
   129		} else if i != len(s) {
   130			c.state = stateAfterName
   131		}
   132		return c, i
   133	}
   134	
   135	// tAfterName is the context transition function for stateAfterName.
   136	func tAfterName(c context, s []byte) (context, int) {
   137		// Look for the start of the value.
   138		i := eatWhiteSpace(s, 0)
   139		if i == len(s) {
   140			return c, len(s)
   141		} else if s[i] != '=' {
   142			// Occurs due to tag ending '>', and valueless attribute.
   143			c.state = stateTag
   144			return c, i
   145		}
   146		c.state = stateBeforeValue
   147		// Consume the "=".
   148		return c, i + 1
   149	}
   150	
   151	var attrStartStates = [...]state{
   152		attrNone:   stateAttr,
   153		attrScript: stateJS,
   154		attrStyle:  stateCSS,
   155		attrURL:    stateURL,
   156	}
   157	
   158	// tBeforeValue is the context transition function for stateBeforeValue.
   159	func tBeforeValue(c context, s []byte) (context, int) {
   160		i := eatWhiteSpace(s, 0)
   161		if i == len(s) {
   162			return c, len(s)
   163		}
   164		// Find the attribute delimiter.
   165		delim := delimSpaceOrTagEnd
   166		switch s[i] {
   167		case '\'':
   168			delim, i = delimSingleQuote, i+1
   169		case '"':
   170			delim, i = delimDoubleQuote, i+1
   171		}
   172		c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
   173		return c, i
   174	}
   175	
   176	// tHTMLCmt is the context transition function for stateHTMLCmt.
   177	func tHTMLCmt(c context, s []byte) (context, int) {
   178		if i := bytes.Index(s, commentEnd); i != -1 {
   179			return context{}, i + 3
   180		}
   181		return c, len(s)
   182	}
   183	
   184	// specialTagEndMarkers maps element types to the character sequence that
   185	// case-insensitively signals the end of the special tag body.
   186	var specialTagEndMarkers = [...]string{
   187		elementScript:   "</script",
   188		elementStyle:    "</style",
   189		elementTextarea: "</textarea",
   190		elementTitle:    "</title",
   191	}
   192	
   193	// tSpecialTagEnd is the context transition function for raw text and RCDATA
   194	// element states.
   195	func tSpecialTagEnd(c context, s []byte) (context, int) {
   196		if c.element != elementNone {
   197			if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
   198				return context{}, i
   199			}
   200		}
   201		return c, len(s)
   202	}
   203	
   204	// tAttr is the context transition function for the attribute state.
   205	func tAttr(c context, s []byte) (context, int) {
   206		return c, len(s)
   207	}
   208	
   209	// tURL is the context transition function for the URL state.
   210	func tURL(c context, s []byte) (context, int) {
   211		if bytes.IndexAny(s, "#?") >= 0 {
   212			c.urlPart = urlPartQueryOrFrag
   213		} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
   214			// HTML5 uses "Valid URL potentially surrounded by spaces" for
   215			// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
   216			c.urlPart = urlPartPreQuery
   217		}
   218		return c, len(s)
   219	}
   220	
   221	// tJS is the context transition function for the JS state.
   222	func tJS(c context, s []byte) (context, int) {
   223		i := bytes.IndexAny(s, `"'/`)
   224		if i == -1 {
   225			// Entire input is non string, comment, regexp tokens.
   226			c.jsCtx = nextJSCtx(s, c.jsCtx)
   227			return c, len(s)
   228		}
   229		c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
   230		switch s[i] {
   231		case '"':
   232			c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
   233		case '\'':
   234			c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
   235		case '/':
   236			switch {
   237			case i+1 < len(s) && s[i+1] == '/':
   238				c.state, i = stateJSLineCmt, i+1
   239			case i+1 < len(s) && s[i+1] == '*':
   240				c.state, i = stateJSBlockCmt, i+1
   241			case c.jsCtx == jsCtxRegexp:
   242				c.state = stateJSRegexp
   243			case c.jsCtx == jsCtxDivOp:
   244				c.jsCtx = jsCtxRegexp
   245			default:
   246				return context{
   247					state: stateError,
   248					err:   errorf(ErrSlashAmbig, 0, "'/' could start a division or regexp: %.32q", s[i:]),
   249				}, len(s)
   250			}
   251		default:
   252			panic("unreachable")
   253		}
   254		return c, i + 1
   255	}
   256	
   257	// tJSDelimited is the context transition function for the JS string and regexp
   258	// states.
   259	func tJSDelimited(c context, s []byte) (context, int) {
   260		specials := `\"`
   261		switch c.state {
   262		case stateJSSqStr:
   263			specials = `\'`
   264		case stateJSRegexp:
   265			specials = `\/[]`
   266		}
   267	
   268		k, inCharset := 0, false
   269		for {
   270			i := k + bytes.IndexAny(s[k:], specials)
   271			if i < k {
   272				break
   273			}
   274			switch s[i] {
   275			case '\\':
   276				i++
   277				if i == len(s) {
   278					return context{
   279						state: stateError,
   280						err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
   281					}, len(s)
   282				}
   283			case '[':
   284				inCharset = true
   285			case ']':
   286				inCharset = false
   287			default:
   288				// end delimiter
   289				if !inCharset {
   290					c.state, c.jsCtx = stateJS, jsCtxDivOp
   291					return c, i + 1
   292				}
   293			}
   294			k = i + 1
   295		}
   296	
   297		if inCharset {
   298			// This can be fixed by making context richer if interpolation
   299			// into charsets is desired.
   300			return context{
   301				state: stateError,
   302				err:   errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s),
   303			}, len(s)
   304		}
   305	
   306		return c, len(s)
   307	}
   308	
   309	var blockCommentEnd = []byte("*/")
   310	
   311	// tBlockCmt is the context transition function for /*comment*/ states.
   312	func tBlockCmt(c context, s []byte) (context, int) {
   313		i := bytes.Index(s, blockCommentEnd)
   314		if i == -1 {
   315			return c, len(s)
   316		}
   317		switch c.state {
   318		case stateJSBlockCmt:
   319			c.state = stateJS
   320		case stateCSSBlockCmt:
   321			c.state = stateCSS
   322		default:
   323			panic(c.state.String())
   324		}
   325		return c, i + 2
   326	}
   327	
   328	// tLineCmt is the context transition function for //comment states.
   329	func tLineCmt(c context, s []byte) (context, int) {
   330		var lineTerminators string
   331		var endState state
   332		switch c.state {
   333		case stateJSLineCmt:
   334			lineTerminators, endState = "\n\r\u2028\u2029", stateJS
   335		case stateCSSLineCmt:
   336			lineTerminators, endState = "\n\f\r", stateCSS
   337			// Line comments are not part of any published CSS standard but
   338			// are supported by the 4 major browsers.
   339			// This defines line comments as
   340			//     LINECOMMENT ::= "//" [^\n\f\d]*
   341			// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
   342			// newlines:
   343			//     nl ::= #xA | #xD #xA | #xD | #xC
   344		default:
   345			panic(c.state.String())
   346		}
   347	
   348		i := bytes.IndexAny(s, lineTerminators)
   349		if i == -1 {
   350			return c, len(s)
   351		}
   352		c.state = endState
   353		// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
   354		// "However, the LineTerminator at the end of the line is not
   355		// considered to be part of the single-line comment; it is
   356		// recognized separately by the lexical grammar and becomes part
   357		// of the stream of input elements for the syntactic grammar."
   358		return c, i
   359	}
   360	
   361	// tCSS is the context transition function for the CSS state.
   362	func tCSS(c context, s []byte) (context, int) {
   363		// CSS quoted strings are almost never used except for:
   364		// (1) URLs as in background: "/foo.png"
   365		// (2) Multiword font-names as in font-family: "Times New Roman"
   366		// (3) List separators in content values as in inline-lists:
   367		//    <style>
   368		//    ul.inlineList { list-style: none; padding:0 }
   369		//    ul.inlineList > li { display: inline }
   370		//    ul.inlineList > li:before { content: ", " }
   371		//    ul.inlineList > li:first-child:before { content: "" }
   372		//    </style>
   373		//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
   374		// (4) Attribute value selectors as in a[href="http://example.com/"]
   375		//
   376		// We conservatively treat all strings as URLs, but make some
   377		// allowances to avoid confusion.
   378		//
   379		// In (1), our conservative assumption is justified.
   380		// In (2), valid font names do not contain ':', '?', or '#', so our
   381		// conservative assumption is fine since we will never transition past
   382		// urlPartPreQuery.
   383		// In (3), our protocol heuristic should not be tripped, and there
   384		// should not be non-space content after a '?' or '#', so as long as
   385		// we only %-encode RFC 3986 reserved characters we are ok.
   386		// In (4), we should URL escape for URL attributes, and for others we
   387		// have the attribute name available if our conservative assumption
   388		// proves problematic for real code.
   389	
   390		k := 0
   391		for {
   392			i := k + bytes.IndexAny(s[k:], `("'/`)
   393			if i < k {
   394				return c, len(s)
   395			}
   396			switch s[i] {
   397			case '(':
   398				// Look for url to the left.
   399				p := bytes.TrimRight(s[:i], "\t\n\f\r ")
   400				if endsWithCSSKeyword(p, "url") {
   401					j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
   402					switch {
   403					case j != len(s) && s[j] == '"':
   404						c.state, j = stateCSSDqURL, j+1
   405					case j != len(s) && s[j] == '\'':
   406						c.state, j = stateCSSSqURL, j+1
   407					default:
   408						c.state = stateCSSURL
   409					}
   410					return c, j
   411				}
   412			case '/':
   413				if i+1 < len(s) {
   414					switch s[i+1] {
   415					case '/':
   416						c.state = stateCSSLineCmt
   417						return c, i + 2
   418					case '*':
   419						c.state = stateCSSBlockCmt
   420						return c, i + 2
   421					}
   422				}
   423			case '"':
   424				c.state = stateCSSDqStr
   425				return c, i + 1
   426			case '\'':
   427				c.state = stateCSSSqStr
   428				return c, i + 1
   429			}
   430			k = i + 1
   431		}
   432	}
   433	
   434	// tCSSStr is the context transition function for the CSS string and URL states.
   435	func tCSSStr(c context, s []byte) (context, int) {
   436		var endAndEsc string
   437		switch c.state {
   438		case stateCSSDqStr, stateCSSDqURL:
   439			endAndEsc = `\"`
   440		case stateCSSSqStr, stateCSSSqURL:
   441			endAndEsc = `\'`
   442		case stateCSSURL:
   443			// Unquoted URLs end with a newline or close parenthesis.
   444			// The below includes the wc (whitespace character) and nl.
   445			endAndEsc = "\\\t\n\f\r )"
   446		default:
   447			panic(c.state.String())
   448		}
   449	
   450		k := 0
   451		for {
   452			i := k + bytes.IndexAny(s[k:], endAndEsc)
   453			if i < k {
   454				c, nread := tURL(c, decodeCSS(s[k:]))
   455				return c, k + nread
   456			}
   457			if s[i] == '\\' {
   458				i++
   459				if i == len(s) {
   460					return context{
   461						state: stateError,
   462						err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s),
   463					}, len(s)
   464				}
   465			} else {
   466				c.state = stateCSS
   467				return c, i + 1
   468			}
   469			c, _ = tURL(c, decodeCSS(s[:i+1]))
   470			k = i + 1
   471		}
   472	}
   473	
   474	// tError is the context transition function for the error state.
   475	func tError(c context, s []byte) (context, int) {
   476		return c, len(s)
   477	}
   478	
   479	// eatAttrName returns the largest j such that s[i:j] is an attribute name.
   480	// It returns an error if s[i:] does not look like it begins with an
   481	// attribute name, such as encountering a quote mark without a preceding
   482	// equals sign.
   483	func eatAttrName(s []byte, i int) (int, *Error) {
   484		for j := i; j < len(s); j++ {
   485			switch s[j] {
   486			case ' ', '\t', '\n', '\f', '\r', '=', '>':
   487				return j, nil
   488			case '\'', '"', '<':
   489				// These result in a parse warning in HTML5 and are
   490				// indicative of serious problems if seen in an attr
   491				// name in a template.
   492				return -1, errorf(ErrBadHTML, 0, "%q in attribute name: %.32q", s[j:j+1], s)
   493			default:
   494				// No-op.
   495			}
   496		}
   497		return len(s), nil
   498	}
   499	
   500	var elementNameMap = map[string]element{
   501		"script":   elementScript,
   502		"style":    elementStyle,
   503		"textarea": elementTextarea,
   504		"title":    elementTitle,
   505	}
   506	
   507	// asciiAlpha reports whether c is an ASCII letter.
   508	func asciiAlpha(c byte) bool {
   509		return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   510	}
   511	
   512	// asciiAlphaNum reports whether c is an ASCII letter or digit.
   513	func asciiAlphaNum(c byte) bool {
   514		return asciiAlpha(c) || '0' <= c && c <= '9'
   515	}
   516	
   517	// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
   518	func eatTagName(s []byte, i int) (int, element) {
   519		if i == len(s) || !asciiAlpha(s[i]) {
   520			return i, elementNone
   521		}
   522		j := i + 1
   523		for j < len(s) {
   524			x := s[j]
   525			if asciiAlphaNum(x) {
   526				j++
   527				continue
   528			}
   529			// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
   530			if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
   531				j += 2
   532				continue
   533			}
   534			break
   535		}
   536		return j, elementNameMap[strings.ToLower(string(s[i:j]))]
   537	}
   538	
   539	// eatWhiteSpace returns the largest j such that s[i:j] is white space.
   540	func eatWhiteSpace(s []byte, i int) int {
   541		for j := i; j < len(s); j++ {
   542			switch s[j] {
   543			case ' ', '\t', '\n', '\f', '\r':
   544				// No-op.
   545			default:
   546				return j
   547			}
   548		}
   549		return len(s)
   550	}
   551	

View as plain text