...
Run Format

Source file src/html/template/transition.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"strings"
    10	)
    11	
    12	// transitionFunc is the array of context transition functions for text nodes.
    13	// A transition function takes a context and template text input, and returns
    14	// the updated context and the number of bytes consumed from the front of the
    15	// input.
    16	var transitionFunc = [...]func(context, []byte) (context, int){
    17		stateText:        tText,
    18		stateTag:         tTag,
    19		stateAttrName:    tAttrName,
    20		stateAfterName:   tAfterName,
    21		stateBeforeValue: tBeforeValue,
    22		stateHTMLCmt:     tHTMLCmt,
    23		stateRCDATA:      tSpecialTagEnd,
    24		stateAttr:        tAttr,
    25		stateURL:         tURL,
    26		stateJS:          tJS,
    27		stateJSDqStr:     tJSDelimited,
    28		stateJSSqStr:     tJSDelimited,
    29		stateJSRegexp:    tJSDelimited,
    30		stateJSBlockCmt:  tBlockCmt,
    31		stateJSLineCmt:   tLineCmt,
    32		stateCSS:         tCSS,
    33		stateCSSDqStr:    tCSSStr,
    34		stateCSSSqStr:    tCSSStr,
    35		stateCSSDqURL:    tCSSStr,
    36		stateCSSSqURL:    tCSSStr,
    37		stateCSSURL:      tCSSStr,
    38		stateCSSBlockCmt: tBlockCmt,
    39		stateCSSLineCmt:  tLineCmt,
    40		stateError:       tError,
    41	}
    42	
    43	var commentStart = []byte("<!--")
    44	var commentEnd = []byte("-->")
    45	
    46	// tText is the context transition function for the text state.
    47	func tText(c context, s []byte) (context, int) {
    48		k := 0
    49		for {
    50			i := k + bytes.IndexByte(s[k:], '<')
    51			if i < k || i+1 == len(s) {
    52				return c, len(s)
    53			} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
    54				return context{state: stateHTMLCmt}, i + 4
    55			}
    56			i++
    57			end := false
    58			if s[i] == '/' {
    59				if i+1 == len(s) {
    60					return c, len(s)
    61				}
    62				end, i = true, i+1
    63			}
    64			j, e := eatTagName(s, i)
    65			if j != i {
    66				if end {
    67					e = elementNone
    68				}
    69				// We've found an HTML tag.
    70				return context{state: stateTag, element: e}, j
    71			}
    72			k = j
    73		}
    74	}
    75	
    76	var elementContentType = [...]state{
    77		elementNone:     stateText,
    78		elementScript:   stateJS,
    79		elementStyle:    stateCSS,
    80		elementTextarea: stateRCDATA,
    81		elementTitle:    stateRCDATA,
    82	}
    83	
    84	// tTag is the context transition function for the tag state.
    85	func tTag(c context, s []byte) (context, int) {
    86		// Find the attribute name.
    87		i := eatWhiteSpace(s, 0)
    88		if i == len(s) {
    89			return c, len(s)
    90		}
    91		if s[i] == '>' {
    92			return context{
    93				state:   elementContentType[c.element],
    94				element: c.element,
    95			}, i + 1
    96		}
    97		j, err := eatAttrName(s, i)
    98		if err != nil {
    99			return context{state: stateError, err: err}, len(s)
   100		}
   101		state, attr := stateTag, attrNone
   102		if i == j {
   103			return context{
   104				state: stateError,
   105				err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
   106			}, len(s)
   107		}
   108		switch attrType(string(s[i:j])) {
   109		case contentTypeURL:
   110			attr = attrURL
   111		case contentTypeCSS:
   112			attr = attrStyle
   113		case contentTypeJS:
   114			attr = attrScript
   115		}
   116		if j == len(s) {
   117			state = stateAttrName
   118		} else {
   119			state = stateAfterName
   120		}
   121		return context{state: state, element: c.element, attr: attr}, j
   122	}
   123	
   124	// tAttrName is the context transition function for stateAttrName.
   125	func tAttrName(c context, s []byte) (context, int) {
   126		i, err := eatAttrName(s, 0)
   127		if err != nil {
   128			return context{state: stateError, err: err}, len(s)
   129		} else if i != len(s) {
   130			c.state = stateAfterName
   131		}
   132		return c, i
   133	}
   134	
   135	// tAfterName is the context transition function for stateAfterName.
   136	func tAfterName(c context, s []byte) (context, int) {
   137		// Look for the start of the value.
   138		i := eatWhiteSpace(s, 0)
   139		if i == len(s) {
   140			return c, len(s)
   141		} else if s[i] != '=' {
   142			// Occurs due to tag ending '>', and valueless attribute.
   143			c.state = stateTag
   144			return c, i
   145		}
   146		c.state = stateBeforeValue
   147		// Consume the "=".
   148		return c, i + 1
   149	}
   150	
   151	var attrStartStates = [...]state{
   152		attrNone:   stateAttr,
   153		attrScript: stateJS,
   154		attrStyle:  stateCSS,
   155		attrURL:    stateURL,
   156	}
   157	
   158	// tBeforeValue is the context transition function for stateBeforeValue.
   159	func tBeforeValue(c context, s []byte) (context, int) {
   160		i := eatWhiteSpace(s, 0)
   161		if i == len(s) {
   162			return c, len(s)
   163		}
   164		// Find the attribute delimiter.
   165		delim := delimSpaceOrTagEnd
   166		switch s[i] {
   167		case '\'':
   168			delim, i = delimSingleQuote, i+1
   169		case '"':
   170			delim, i = delimDoubleQuote, i+1
   171		}
   172		c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
   173		return c, i
   174	}
   175	
   176	// tHTMLCmt is the context transition function for stateHTMLCmt.
   177	func tHTMLCmt(c context, s []byte) (context, int) {
   178		if i := bytes.Index(s, commentEnd); i != -1 {
   179			return context{}, i + 3
   180		}
   181		return c, len(s)
   182	}
   183	
   184	// specialTagEndMarkers maps element types to the character sequence that
   185	// case-insensitively signals the end of the special tag body.
   186	var specialTagEndMarkers = [...][]byte{
   187		elementScript:   []byte("script"),
   188		elementStyle:    []byte("style"),
   189		elementTextarea: []byte("textarea"),
   190		elementTitle:    []byte("title"),
   191	}
   192	
   193	var (
   194		specialTagEndPrefix = []byte("</")
   195		tagEndSeparators    = []byte("> \t\n\f/")
   196	)
   197	
   198	// tSpecialTagEnd is the context transition function for raw text and RCDATA
   199	// element states.
   200	func tSpecialTagEnd(c context, s []byte) (context, int) {
   201		if c.element != elementNone {
   202			if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
   203				return context{}, i
   204			}
   205		}
   206		return c, len(s)
   207	}
   208	
   209	// indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
   210	func indexTagEnd(s []byte, tag []byte) int {
   211		res := 0
   212		plen := len(specialTagEndPrefix)
   213		for len(s) > 0 {
   214			// Try to find the tag end prefix first
   215			i := bytes.Index(s, specialTagEndPrefix)
   216			if i == -1 {
   217				return i
   218			}
   219			s = s[i+plen:]
   220			// Try to match the actual tag if there is still space for it
   221			if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
   222				s = s[len(tag):]
   223				// Check the tag is followed by a proper separator
   224				if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
   225					return res + i
   226				}
   227				res += len(tag)
   228			}
   229			res += i + plen
   230		}
   231		return -1
   232	}
   233	
   234	// tAttr is the context transition function for the attribute state.
   235	func tAttr(c context, s []byte) (context, int) {
   236		return c, len(s)
   237	}
   238	
   239	// tURL is the context transition function for the URL state.
   240	func tURL(c context, s []byte) (context, int) {
   241		if bytes.IndexAny(s, "#?") >= 0 {
   242			c.urlPart = urlPartQueryOrFrag
   243		} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
   244			// HTML5 uses "Valid URL potentially surrounded by spaces" for
   245			// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
   246			c.urlPart = urlPartPreQuery
   247		}
   248		return c, len(s)
   249	}
   250	
   251	// tJS is the context transition function for the JS state.
   252	func tJS(c context, s []byte) (context, int) {
   253		i := bytes.IndexAny(s, `"'/`)
   254		if i == -1 {
   255			// Entire input is non string, comment, regexp tokens.
   256			c.jsCtx = nextJSCtx(s, c.jsCtx)
   257			return c, len(s)
   258		}
   259		c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
   260		switch s[i] {
   261		case '"':
   262			c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
   263		case '\'':
   264			c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
   265		case '/':
   266			switch {
   267			case i+1 < len(s) && s[i+1] == '/':
   268				c.state, i = stateJSLineCmt, i+1
   269			case i+1 < len(s) && s[i+1] == '*':
   270				c.state, i = stateJSBlockCmt, i+1
   271			case c.jsCtx == jsCtxRegexp:
   272				c.state = stateJSRegexp
   273			case c.jsCtx == jsCtxDivOp:
   274				c.jsCtx = jsCtxRegexp
   275			default:
   276				return context{
   277					state: stateError,
   278					err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
   279				}, len(s)
   280			}
   281		default:
   282			panic("unreachable")
   283		}
   284		return c, i + 1
   285	}
   286	
   287	// tJSDelimited is the context transition function for the JS string and regexp
   288	// states.
   289	func tJSDelimited(c context, s []byte) (context, int) {
   290		specials := `\"`
   291		switch c.state {
   292		case stateJSSqStr:
   293			specials = `\'`
   294		case stateJSRegexp:
   295			specials = `\/[]`
   296		}
   297	
   298		k, inCharset := 0, false
   299		for {
   300			i := k + bytes.IndexAny(s[k:], specials)
   301			if i < k {
   302				break
   303			}
   304			switch s[i] {
   305			case '\\':
   306				i++
   307				if i == len(s) {
   308					return context{
   309						state: stateError,
   310						err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
   311					}, len(s)
   312				}
   313			case '[':
   314				inCharset = true
   315			case ']':
   316				inCharset = false
   317			default:
   318				// end delimiter
   319				if !inCharset {
   320					c.state, c.jsCtx = stateJS, jsCtxDivOp
   321					return c, i + 1
   322				}
   323			}
   324			k = i + 1
   325		}
   326	
   327		if inCharset {
   328			// This can be fixed by making context richer if interpolation
   329			// into charsets is desired.
   330			return context{
   331				state: stateError,
   332				err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
   333			}, len(s)
   334		}
   335	
   336		return c, len(s)
   337	}
   338	
   339	var blockCommentEnd = []byte("*/")
   340	
   341	// tBlockCmt is the context transition function for /*comment*/ states.
   342	func tBlockCmt(c context, s []byte) (context, int) {
   343		i := bytes.Index(s, blockCommentEnd)
   344		if i == -1 {
   345			return c, len(s)
   346		}
   347		switch c.state {
   348		case stateJSBlockCmt:
   349			c.state = stateJS
   350		case stateCSSBlockCmt:
   351			c.state = stateCSS
   352		default:
   353			panic(c.state.String())
   354		}
   355		return c, i + 2
   356	}
   357	
   358	// tLineCmt is the context transition function for //comment states.
   359	func tLineCmt(c context, s []byte) (context, int) {
   360		var lineTerminators string
   361		var endState state
   362		switch c.state {
   363		case stateJSLineCmt:
   364			lineTerminators, endState = "\n\r\u2028\u2029", stateJS
   365		case stateCSSLineCmt:
   366			lineTerminators, endState = "\n\f\r", stateCSS
   367			// Line comments are not part of any published CSS standard but
   368			// are supported by the 4 major browsers.
   369			// This defines line comments as
   370			//     LINECOMMENT ::= "//" [^\n\f\d]*
   371			// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
   372			// newlines:
   373			//     nl ::= #xA | #xD #xA | #xD | #xC
   374		default:
   375			panic(c.state.String())
   376		}
   377	
   378		i := bytes.IndexAny(s, lineTerminators)
   379		if i == -1 {
   380			return c, len(s)
   381		}
   382		c.state = endState
   383		// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
   384		// "However, the LineTerminator at the end of the line is not
   385		// considered to be part of the single-line comment; it is
   386		// recognized separately by the lexical grammar and becomes part
   387		// of the stream of input elements for the syntactic grammar."
   388		return c, i
   389	}
   390	
   391	// tCSS is the context transition function for the CSS state.
   392	func tCSS(c context, s []byte) (context, int) {
   393		// CSS quoted strings are almost never used except for:
   394		// (1) URLs as in background: "/foo.png"
   395		// (2) Multiword font-names as in font-family: "Times New Roman"
   396		// (3) List separators in content values as in inline-lists:
   397		//    <style>
   398		//    ul.inlineList { list-style: none; padding:0 }
   399		//    ul.inlineList > li { display: inline }
   400		//    ul.inlineList > li:before { content: ", " }
   401		//    ul.inlineList > li:first-child:before { content: "" }
   402		//    </style>
   403		//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
   404		// (4) Attribute value selectors as in a[href="http://example.com/"]
   405		//
   406		// We conservatively treat all strings as URLs, but make some
   407		// allowances to avoid confusion.
   408		//
   409		// In (1), our conservative assumption is justified.
   410		// In (2), valid font names do not contain ':', '?', or '#', so our
   411		// conservative assumption is fine since we will never transition past
   412		// urlPartPreQuery.
   413		// In (3), our protocol heuristic should not be tripped, and there
   414		// should not be non-space content after a '?' or '#', so as long as
   415		// we only %-encode RFC 3986 reserved characters we are ok.
   416		// In (4), we should URL escape for URL attributes, and for others we
   417		// have the attribute name available if our conservative assumption
   418		// proves problematic for real code.
   419	
   420		k := 0
   421		for {
   422			i := k + bytes.IndexAny(s[k:], `("'/`)
   423			if i < k {
   424				return c, len(s)
   425			}
   426			switch s[i] {
   427			case '(':
   428				// Look for url to the left.
   429				p := bytes.TrimRight(s[:i], "\t\n\f\r ")
   430				if endsWithCSSKeyword(p, "url") {
   431					j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
   432					switch {
   433					case j != len(s) && s[j] == '"':
   434						c.state, j = stateCSSDqURL, j+1
   435					case j != len(s) && s[j] == '\'':
   436						c.state, j = stateCSSSqURL, j+1
   437					default:
   438						c.state = stateCSSURL
   439					}
   440					return c, j
   441				}
   442			case '/':
   443				if i+1 < len(s) {
   444					switch s[i+1] {
   445					case '/':
   446						c.state = stateCSSLineCmt
   447						return c, i + 2
   448					case '*':
   449						c.state = stateCSSBlockCmt
   450						return c, i + 2
   451					}
   452				}
   453			case '"':
   454				c.state = stateCSSDqStr
   455				return c, i + 1
   456			case '\'':
   457				c.state = stateCSSSqStr
   458				return c, i + 1
   459			}
   460			k = i + 1
   461		}
   462	}
   463	
   464	// tCSSStr is the context transition function for the CSS string and URL states.
   465	func tCSSStr(c context, s []byte) (context, int) {
   466		var endAndEsc string
   467		switch c.state {
   468		case stateCSSDqStr, stateCSSDqURL:
   469			endAndEsc = `\"`
   470		case stateCSSSqStr, stateCSSSqURL:
   471			endAndEsc = `\'`
   472		case stateCSSURL:
   473			// Unquoted URLs end with a newline or close parenthesis.
   474			// The below includes the wc (whitespace character) and nl.
   475			endAndEsc = "\\\t\n\f\r )"
   476		default:
   477			panic(c.state.String())
   478		}
   479	
   480		k := 0
   481		for {
   482			i := k + bytes.IndexAny(s[k:], endAndEsc)
   483			if i < k {
   484				c, nread := tURL(c, decodeCSS(s[k:]))
   485				return c, k + nread
   486			}
   487			if s[i] == '\\' {
   488				i++
   489				if i == len(s) {
   490					return context{
   491						state: stateError,
   492						err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
   493					}, len(s)
   494				}
   495			} else {
   496				c.state = stateCSS
   497				return c, i + 1
   498			}
   499			c, _ = tURL(c, decodeCSS(s[:i+1]))
   500			k = i + 1
   501		}
   502	}
   503	
   504	// tError is the context transition function for the error state.
   505	func tError(c context, s []byte) (context, int) {
   506		return c, len(s)
   507	}
   508	
   509	// eatAttrName returns the largest j such that s[i:j] is an attribute name.
   510	// It returns an error if s[i:] does not look like it begins with an
   511	// attribute name, such as encountering a quote mark without a preceding
   512	// equals sign.
   513	func eatAttrName(s []byte, i int) (int, *Error) {
   514		for j := i; j < len(s); j++ {
   515			switch s[j] {
   516			case ' ', '\t', '\n', '\f', '\r', '=', '>':
   517				return j, nil
   518			case '\'', '"', '<':
   519				// These result in a parse warning in HTML5 and are
   520				// indicative of serious problems if seen in an attr
   521				// name in a template.
   522				return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
   523			default:
   524				// No-op.
   525			}
   526		}
   527		return len(s), nil
   528	}
   529	
   530	var elementNameMap = map[string]element{
   531		"script":   elementScript,
   532		"style":    elementStyle,
   533		"textarea": elementTextarea,
   534		"title":    elementTitle,
   535	}
   536	
   537	// asciiAlpha reports whether c is an ASCII letter.
   538	func asciiAlpha(c byte) bool {
   539		return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   540	}
   541	
   542	// asciiAlphaNum reports whether c is an ASCII letter or digit.
   543	func asciiAlphaNum(c byte) bool {
   544		return asciiAlpha(c) || '0' <= c && c <= '9'
   545	}
   546	
   547	// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
   548	func eatTagName(s []byte, i int) (int, element) {
   549		if i == len(s) || !asciiAlpha(s[i]) {
   550			return i, elementNone
   551		}
   552		j := i + 1
   553		for j < len(s) {
   554			x := s[j]
   555			if asciiAlphaNum(x) {
   556				j++
   557				continue
   558			}
   559			// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
   560			if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
   561				j += 2
   562				continue
   563			}
   564			break
   565		}
   566		return j, elementNameMap[strings.ToLower(string(s[i:j]))]
   567	}
   568	
   569	// eatWhiteSpace returns the largest j such that s[i:j] is white space.
   570	func eatWhiteSpace(s []byte, i int) int {
   571		for j := i; j < len(s); j++ {
   572			switch s[j] {
   573			case ' ', '\t', '\n', '\f', '\r':
   574				// No-op.
   575			default:
   576				return j
   577			}
   578		}
   579		return len(s)
   580	}
   581	

View as plain text