...
Run Format

Source file src/html/template/transition.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"strings"
    10	)
    11	
    12	// transitionFunc is the array of context transition functions for text nodes.
    13	// A transition function takes a context and template text input, and returns
    14	// the updated context and the number of bytes consumed from the front of the
    15	// input.
    16	var transitionFunc = [...]func(context, []byte) (context, int){
    17		stateText:        tText,
    18		stateTag:         tTag,
    19		stateAttrName:    tAttrName,
    20		stateAfterName:   tAfterName,
    21		stateBeforeValue: tBeforeValue,
    22		stateHTMLCmt:     tHTMLCmt,
    23		stateRCDATA:      tSpecialTagEnd,
    24		stateAttr:        tAttr,
    25		stateURL:         tURL,
    26		stateJS:          tJS,
    27		stateJSDqStr:     tJSDelimited,
    28		stateJSSqStr:     tJSDelimited,
    29		stateJSRegexp:    tJSDelimited,
    30		stateJSBlockCmt:  tBlockCmt,
    31		stateJSLineCmt:   tLineCmt,
    32		stateCSS:         tCSS,
    33		stateCSSDqStr:    tCSSStr,
    34		stateCSSSqStr:    tCSSStr,
    35		stateCSSDqURL:    tCSSStr,
    36		stateCSSSqURL:    tCSSStr,
    37		stateCSSURL:      tCSSStr,
    38		stateCSSBlockCmt: tBlockCmt,
    39		stateCSSLineCmt:  tLineCmt,
    40		stateError:       tError,
    41	}
    42	
    43	var commentStart = []byte("<!--")
    44	var commentEnd = []byte("-->")
    45	
    46	// tText is the context transition function for the text state.
    47	func tText(c context, s []byte) (context, int) {
    48		k := 0
    49		for {
    50			i := k + bytes.IndexByte(s[k:], '<')
    51			if i < k || i+1 == len(s) {
    52				return c, len(s)
    53			} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
    54				return context{state: stateHTMLCmt}, i + 4
    55			}
    56			i++
    57			end := false
    58			if s[i] == '/' {
    59				if i+1 == len(s) {
    60					return c, len(s)
    61				}
    62				end, i = true, i+1
    63			}
    64			j, e := eatTagName(s, i)
    65			if j != i {
    66				if end {
    67					e = elementNone
    68				}
    69				// We've found an HTML tag.
    70				return context{state: stateTag, element: e}, j
    71			}
    72			k = j
    73		}
    74	}
    75	
    76	var elementContentType = [...]state{
    77		elementNone:     stateText,
    78		elementScript:   stateJS,
    79		elementStyle:    stateCSS,
    80		elementTextarea: stateRCDATA,
    81		elementTitle:    stateRCDATA,
    82	}
    83	
    84	// tTag is the context transition function for the tag state.
    85	func tTag(c context, s []byte) (context, int) {
    86		// Find the attribute name.
    87		i := eatWhiteSpace(s, 0)
    88		if i == len(s) {
    89			return c, len(s)
    90		}
    91		if s[i] == '>' {
    92			return context{
    93				state:   elementContentType[c.element],
    94				element: c.element,
    95			}, i + 1
    96		}
    97		j, err := eatAttrName(s, i)
    98		if err != nil {
    99			return context{state: stateError, err: err}, len(s)
   100		}
   101		state, attr := stateTag, attrNone
   102		if i == j {
   103			return context{
   104				state: stateError,
   105				err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
   106			}, len(s)
   107		}
   108	
   109		attrName := string(s[i:j])
   110		if c.element == elementScript && attrName == "type" {
   111			attr = attrScriptType
   112		} else {
   113			switch attrType(attrName) {
   114			case contentTypeURL:
   115				attr = attrURL
   116			case contentTypeCSS:
   117				attr = attrStyle
   118			case contentTypeJS:
   119				attr = attrScript
   120			}
   121		}
   122	
   123		if j == len(s) {
   124			state = stateAttrName
   125		} else {
   126			state = stateAfterName
   127		}
   128		return context{state: state, element: c.element, attr: attr}, j
   129	}
   130	
   131	// tAttrName is the context transition function for stateAttrName.
   132	func tAttrName(c context, s []byte) (context, int) {
   133		i, err := eatAttrName(s, 0)
   134		if err != nil {
   135			return context{state: stateError, err: err}, len(s)
   136		} else if i != len(s) {
   137			c.state = stateAfterName
   138		}
   139		return c, i
   140	}
   141	
   142	// tAfterName is the context transition function for stateAfterName.
   143	func tAfterName(c context, s []byte) (context, int) {
   144		// Look for the start of the value.
   145		i := eatWhiteSpace(s, 0)
   146		if i == len(s) {
   147			return c, len(s)
   148		} else if s[i] != '=' {
   149			// Occurs due to tag ending '>', and valueless attribute.
   150			c.state = stateTag
   151			return c, i
   152		}
   153		c.state = stateBeforeValue
   154		// Consume the "=".
   155		return c, i + 1
   156	}
   157	
   158	var attrStartStates = [...]state{
   159		attrNone:       stateAttr,
   160		attrScript:     stateJS,
   161		attrScriptType: stateAttr,
   162		attrStyle:      stateCSS,
   163		attrURL:        stateURL,
   164	}
   165	
   166	// tBeforeValue is the context transition function for stateBeforeValue.
   167	func tBeforeValue(c context, s []byte) (context, int) {
   168		i := eatWhiteSpace(s, 0)
   169		if i == len(s) {
   170			return c, len(s)
   171		}
   172		// Find the attribute delimiter.
   173		delim := delimSpaceOrTagEnd
   174		switch s[i] {
   175		case '\'':
   176			delim, i = delimSingleQuote, i+1
   177		case '"':
   178			delim, i = delimDoubleQuote, i+1
   179		}
   180		c.state, c.delim = attrStartStates[c.attr], delim
   181		return c, i
   182	}
   183	
   184	// tHTMLCmt is the context transition function for stateHTMLCmt.
   185	func tHTMLCmt(c context, s []byte) (context, int) {
   186		if i := bytes.Index(s, commentEnd); i != -1 {
   187			return context{}, i + 3
   188		}
   189		return c, len(s)
   190	}
   191	
   192	// specialTagEndMarkers maps element types to the character sequence that
   193	// case-insensitively signals the end of the special tag body.
   194	var specialTagEndMarkers = [...][]byte{
   195		elementScript:   []byte("script"),
   196		elementStyle:    []byte("style"),
   197		elementTextarea: []byte("textarea"),
   198		elementTitle:    []byte("title"),
   199	}
   200	
   201	var (
   202		specialTagEndPrefix = []byte("</")
   203		tagEndSeparators    = []byte("> \t\n\f/")
   204	)
   205	
   206	// tSpecialTagEnd is the context transition function for raw text and RCDATA
   207	// element states.
   208	func tSpecialTagEnd(c context, s []byte) (context, int) {
   209		if c.element != elementNone {
   210			if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
   211				return context{}, i
   212			}
   213		}
   214		return c, len(s)
   215	}
   216	
   217	// indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
   218	func indexTagEnd(s []byte, tag []byte) int {
   219		res := 0
   220		plen := len(specialTagEndPrefix)
   221		for len(s) > 0 {
   222			// Try to find the tag end prefix first
   223			i := bytes.Index(s, specialTagEndPrefix)
   224			if i == -1 {
   225				return i
   226			}
   227			s = s[i+plen:]
   228			// Try to match the actual tag if there is still space for it
   229			if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
   230				s = s[len(tag):]
   231				// Check the tag is followed by a proper separator
   232				if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
   233					return res + i
   234				}
   235				res += len(tag)
   236			}
   237			res += i + plen
   238		}
   239		return -1
   240	}
   241	
   242	// tAttr is the context transition function for the attribute state.
   243	func tAttr(c context, s []byte) (context, int) {
   244		return c, len(s)
   245	}
   246	
   247	// tURL is the context transition function for the URL state.
   248	func tURL(c context, s []byte) (context, int) {
   249		if bytes.IndexAny(s, "#?") >= 0 {
   250			c.urlPart = urlPartQueryOrFrag
   251		} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
   252			// HTML5 uses "Valid URL potentially surrounded by spaces" for
   253			// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
   254			c.urlPart = urlPartPreQuery
   255		}
   256		return c, len(s)
   257	}
   258	
   259	// tJS is the context transition function for the JS state.
   260	func tJS(c context, s []byte) (context, int) {
   261		i := bytes.IndexAny(s, `"'/`)
   262		if i == -1 {
   263			// Entire input is non string, comment, regexp tokens.
   264			c.jsCtx = nextJSCtx(s, c.jsCtx)
   265			return c, len(s)
   266		}
   267		c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
   268		switch s[i] {
   269		case '"':
   270			c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
   271		case '\'':
   272			c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
   273		case '/':
   274			switch {
   275			case i+1 < len(s) && s[i+1] == '/':
   276				c.state, i = stateJSLineCmt, i+1
   277			case i+1 < len(s) && s[i+1] == '*':
   278				c.state, i = stateJSBlockCmt, i+1
   279			case c.jsCtx == jsCtxRegexp:
   280				c.state = stateJSRegexp
   281			case c.jsCtx == jsCtxDivOp:
   282				c.jsCtx = jsCtxRegexp
   283			default:
   284				return context{
   285					state: stateError,
   286					err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
   287				}, len(s)
   288			}
   289		default:
   290			panic("unreachable")
   291		}
   292		return c, i + 1
   293	}
   294	
   295	// tJSDelimited is the context transition function for the JS string and regexp
   296	// states.
   297	func tJSDelimited(c context, s []byte) (context, int) {
   298		specials := `\"`
   299		switch c.state {
   300		case stateJSSqStr:
   301			specials = `\'`
   302		case stateJSRegexp:
   303			specials = `\/[]`
   304		}
   305	
   306		k, inCharset := 0, false
   307		for {
   308			i := k + bytes.IndexAny(s[k:], specials)
   309			if i < k {
   310				break
   311			}
   312			switch s[i] {
   313			case '\\':
   314				i++
   315				if i == len(s) {
   316					return context{
   317						state: stateError,
   318						err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
   319					}, len(s)
   320				}
   321			case '[':
   322				inCharset = true
   323			case ']':
   324				inCharset = false
   325			default:
   326				// end delimiter
   327				if !inCharset {
   328					c.state, c.jsCtx = stateJS, jsCtxDivOp
   329					return c, i + 1
   330				}
   331			}
   332			k = i + 1
   333		}
   334	
   335		if inCharset {
   336			// This can be fixed by making context richer if interpolation
   337			// into charsets is desired.
   338			return context{
   339				state: stateError,
   340				err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
   341			}, len(s)
   342		}
   343	
   344		return c, len(s)
   345	}
   346	
   347	var blockCommentEnd = []byte("*/")
   348	
   349	// tBlockCmt is the context transition function for /*comment*/ states.
   350	func tBlockCmt(c context, s []byte) (context, int) {
   351		i := bytes.Index(s, blockCommentEnd)
   352		if i == -1 {
   353			return c, len(s)
   354		}
   355		switch c.state {
   356		case stateJSBlockCmt:
   357			c.state = stateJS
   358		case stateCSSBlockCmt:
   359			c.state = stateCSS
   360		default:
   361			panic(c.state.String())
   362		}
   363		return c, i + 2
   364	}
   365	
   366	// tLineCmt is the context transition function for //comment states.
   367	func tLineCmt(c context, s []byte) (context, int) {
   368		var lineTerminators string
   369		var endState state
   370		switch c.state {
   371		case stateJSLineCmt:
   372			lineTerminators, endState = "\n\r\u2028\u2029", stateJS
   373		case stateCSSLineCmt:
   374			lineTerminators, endState = "\n\f\r", stateCSS
   375			// Line comments are not part of any published CSS standard but
   376			// are supported by the 4 major browsers.
   377			// This defines line comments as
   378			//     LINECOMMENT ::= "//" [^\n\f\d]*
   379			// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
   380			// newlines:
   381			//     nl ::= #xA | #xD #xA | #xD | #xC
   382		default:
   383			panic(c.state.String())
   384		}
   385	
   386		i := bytes.IndexAny(s, lineTerminators)
   387		if i == -1 {
   388			return c, len(s)
   389		}
   390		c.state = endState
   391		// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
   392		// "However, the LineTerminator at the end of the line is not
   393		// considered to be part of the single-line comment; it is
   394		// recognized separately by the lexical grammar and becomes part
   395		// of the stream of input elements for the syntactic grammar."
   396		return c, i
   397	}
   398	
   399	// tCSS is the context transition function for the CSS state.
   400	func tCSS(c context, s []byte) (context, int) {
   401		// CSS quoted strings are almost never used except for:
   402		// (1) URLs as in background: "/foo.png"
   403		// (2) Multiword font-names as in font-family: "Times New Roman"
   404		// (3) List separators in content values as in inline-lists:
   405		//    <style>
   406		//    ul.inlineList { list-style: none; padding:0 }
   407		//    ul.inlineList > li { display: inline }
   408		//    ul.inlineList > li:before { content: ", " }
   409		//    ul.inlineList > li:first-child:before { content: "" }
   410		//    </style>
   411		//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
   412		// (4) Attribute value selectors as in a[href="http://example.com/"]
   413		//
   414		// We conservatively treat all strings as URLs, but make some
   415		// allowances to avoid confusion.
   416		//
   417		// In (1), our conservative assumption is justified.
   418		// In (2), valid font names do not contain ':', '?', or '#', so our
   419		// conservative assumption is fine since we will never transition past
   420		// urlPartPreQuery.
   421		// In (3), our protocol heuristic should not be tripped, and there
   422		// should not be non-space content after a '?' or '#', so as long as
   423		// we only %-encode RFC 3986 reserved characters we are ok.
   424		// In (4), we should URL escape for URL attributes, and for others we
   425		// have the attribute name available if our conservative assumption
   426		// proves problematic for real code.
   427	
   428		k := 0
   429		for {
   430			i := k + bytes.IndexAny(s[k:], `("'/`)
   431			if i < k {
   432				return c, len(s)
   433			}
   434			switch s[i] {
   435			case '(':
   436				// Look for url to the left.
   437				p := bytes.TrimRight(s[:i], "\t\n\f\r ")
   438				if endsWithCSSKeyword(p, "url") {
   439					j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
   440					switch {
   441					case j != len(s) && s[j] == '"':
   442						c.state, j = stateCSSDqURL, j+1
   443					case j != len(s) && s[j] == '\'':
   444						c.state, j = stateCSSSqURL, j+1
   445					default:
   446						c.state = stateCSSURL
   447					}
   448					return c, j
   449				}
   450			case '/':
   451				if i+1 < len(s) {
   452					switch s[i+1] {
   453					case '/':
   454						c.state = stateCSSLineCmt
   455						return c, i + 2
   456					case '*':
   457						c.state = stateCSSBlockCmt
   458						return c, i + 2
   459					}
   460				}
   461			case '"':
   462				c.state = stateCSSDqStr
   463				return c, i + 1
   464			case '\'':
   465				c.state = stateCSSSqStr
   466				return c, i + 1
   467			}
   468			k = i + 1
   469		}
   470	}
   471	
   472	// tCSSStr is the context transition function for the CSS string and URL states.
   473	func tCSSStr(c context, s []byte) (context, int) {
   474		var endAndEsc string
   475		switch c.state {
   476		case stateCSSDqStr, stateCSSDqURL:
   477			endAndEsc = `\"`
   478		case stateCSSSqStr, stateCSSSqURL:
   479			endAndEsc = `\'`
   480		case stateCSSURL:
   481			// Unquoted URLs end with a newline or close parenthesis.
   482			// The below includes the wc (whitespace character) and nl.
   483			endAndEsc = "\\\t\n\f\r )"
   484		default:
   485			panic(c.state.String())
   486		}
   487	
   488		k := 0
   489		for {
   490			i := k + bytes.IndexAny(s[k:], endAndEsc)
   491			if i < k {
   492				c, nread := tURL(c, decodeCSS(s[k:]))
   493				return c, k + nread
   494			}
   495			if s[i] == '\\' {
   496				i++
   497				if i == len(s) {
   498					return context{
   499						state: stateError,
   500						err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
   501					}, len(s)
   502				}
   503			} else {
   504				c.state = stateCSS
   505				return c, i + 1
   506			}
   507			c, _ = tURL(c, decodeCSS(s[:i+1]))
   508			k = i + 1
   509		}
   510	}
   511	
   512	// tError is the context transition function for the error state.
   513	func tError(c context, s []byte) (context, int) {
   514		return c, len(s)
   515	}
   516	
   517	// eatAttrName returns the largest j such that s[i:j] is an attribute name.
   518	// It returns an error if s[i:] does not look like it begins with an
   519	// attribute name, such as encountering a quote mark without a preceding
   520	// equals sign.
   521	func eatAttrName(s []byte, i int) (int, *Error) {
   522		for j := i; j < len(s); j++ {
   523			switch s[j] {
   524			case ' ', '\t', '\n', '\f', '\r', '=', '>':
   525				return j, nil
   526			case '\'', '"', '<':
   527				// These result in a parse warning in HTML5 and are
   528				// indicative of serious problems if seen in an attr
   529				// name in a template.
   530				return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
   531			default:
   532				// No-op.
   533			}
   534		}
   535		return len(s), nil
   536	}
   537	
   538	var elementNameMap = map[string]element{
   539		"script":   elementScript,
   540		"style":    elementStyle,
   541		"textarea": elementTextarea,
   542		"title":    elementTitle,
   543	}
   544	
   545	// asciiAlpha reports whether c is an ASCII letter.
   546	func asciiAlpha(c byte) bool {
   547		return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   548	}
   549	
   550	// asciiAlphaNum reports whether c is an ASCII letter or digit.
   551	func asciiAlphaNum(c byte) bool {
   552		return asciiAlpha(c) || '0' <= c && c <= '9'
   553	}
   554	
   555	// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
   556	func eatTagName(s []byte, i int) (int, element) {
   557		if i == len(s) || !asciiAlpha(s[i]) {
   558			return i, elementNone
   559		}
   560		j := i + 1
   561		for j < len(s) {
   562			x := s[j]
   563			if asciiAlphaNum(x) {
   564				j++
   565				continue
   566			}
   567			// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
   568			if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
   569				j += 2
   570				continue
   571			}
   572			break
   573		}
   574		return j, elementNameMap[strings.ToLower(string(s[i:j]))]
   575	}
   576	
   577	// eatWhiteSpace returns the largest j such that s[i:j] is white space.
   578	func eatWhiteSpace(s []byte, i int) int {
   579		for j := i; j < len(s); j++ {
   580			switch s[j] {
   581			case ' ', '\t', '\n', '\f', '\r':
   582				// No-op.
   583			default:
   584				return j
   585			}
   586		}
   587		return len(s)
   588	}
   589	

View as plain text