transition.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"strings"
    10  )
    11  
    12  // transitionFunc is the array of context transition functions for text nodes.
    13  // A transition function takes a context and template text input, and returns
    14  // the updated context and the number of bytes consumed from the front of the
    15  // input.
    16  var transitionFunc = [...]func(context, []byte) (context, int){
    17  	stateText:           tText,
    18  	stateTag:            tTag,
    19  	stateAttrName:       tAttrName,
    20  	stateAfterName:      tAfterName,
    21  	stateBeforeValue:    tBeforeValue,
    22  	stateHTMLCmt:        tHTMLCmt,
    23  	stateRCDATA:         tSpecialTagEnd,
    24  	stateAttr:           tAttr,
    25  	stateURL:            tURL,
    26  	stateSrcset:         tURL,
    27  	stateJS:             tJS,
    28  	stateJSDqStr:        tJSDelimited,
    29  	stateJSSqStr:        tJSDelimited,
    30  	stateJSRegexp:       tJSDelimited,
    31  	stateJSTmplLit:      tJSTmpl,
    32  	stateJSBlockCmt:     tBlockCmt,
    33  	stateJSLineCmt:      tLineCmt,
    34  	stateJSHTMLOpenCmt:  tLineCmt,
    35  	stateJSHTMLCloseCmt: tLineCmt,
    36  	stateCSS:            tCSS,
    37  	stateCSSDqStr:       tCSSStr,
    38  	stateCSSSqStr:       tCSSStr,
    39  	stateCSSDqURL:       tCSSStr,
    40  	stateCSSSqURL:       tCSSStr,
    41  	stateCSSURL:         tCSSStr,
    42  	stateCSSBlockCmt:    tBlockCmt,
    43  	stateCSSLineCmt:     tLineCmt,
    44  	stateError:          tError,
    45  }
    46  
    47  var commentStart = []byte("<!--")
    48  var commentEnd = []byte("-->")
    49  
    50  // tText is the context transition function for the text state.
    51  func tText(c context, s []byte) (context, int) {
    52  	k := 0
    53  	for {
    54  		i := k + bytes.IndexByte(s[k:], '<')
    55  		if i < k || i+1 == len(s) {
    56  			return c, len(s)
    57  		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
    58  			return context{state: stateHTMLCmt}, i + 4
    59  		}
    60  		i++
    61  		end := false
    62  		if s[i] == '/' {
    63  			if i+1 == len(s) {
    64  				return c, len(s)
    65  			}
    66  			end, i = true, i+1
    67  		}
    68  		j, e := eatTagName(s, i)
    69  		if j != i {
    70  			if end {
    71  				e = elementNone
    72  			}
    73  			// We've found an HTML tag.
    74  			return context{state: stateTag, element: e}, j
    75  		}
    76  		k = j
    77  	}
    78  }
    79  
    80  var elementContentType = [...]state{
    81  	elementNone:     stateText,
    82  	elementScript:   stateJS,
    83  	elementStyle:    stateCSS,
    84  	elementTextarea: stateRCDATA,
    85  	elementTitle:    stateRCDATA,
    86  }
    87  
    88  // tTag is the context transition function for the tag state.
    89  func tTag(c context, s []byte) (context, int) {
    90  	// Find the attribute name.
    91  	i := eatWhiteSpace(s, 0)
    92  	if i == len(s) {
    93  		return c, len(s)
    94  	}
    95  	if s[i] == '>' {
    96  		return context{
    97  			state:   elementContentType[c.element],
    98  			element: c.element,
    99  		}, i + 1
   100  	}
   101  	j, err := eatAttrName(s, i)
   102  	if err != nil {
   103  		return context{state: stateError, err: err}, len(s)
   104  	}
   105  	state, attr := stateTag, attrNone
   106  	if i == j {
   107  		return context{
   108  			state: stateError,
   109  			err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
   110  		}, len(s)
   111  	}
   112  
   113  	attrName := strings.ToLower(string(s[i:j]))
   114  	if c.element == elementScript && attrName == "type" {
   115  		attr = attrScriptType
   116  	} else {
   117  		switch attrType(attrName) {
   118  		case contentTypeURL:
   119  			attr = attrURL
   120  		case contentTypeCSS:
   121  			attr = attrStyle
   122  		case contentTypeJS:
   123  			attr = attrScript
   124  		case contentTypeSrcset:
   125  			attr = attrSrcset
   126  		}
   127  	}
   128  
   129  	if j == len(s) {
   130  		state = stateAttrName
   131  	} else {
   132  		state = stateAfterName
   133  	}
   134  	return context{state: state, element: c.element, attr: attr}, j
   135  }
   136  
   137  // tAttrName is the context transition function for stateAttrName.
   138  func tAttrName(c context, s []byte) (context, int) {
   139  	i, err := eatAttrName(s, 0)
   140  	if err != nil {
   141  		return context{state: stateError, err: err}, len(s)
   142  	} else if i != len(s) {
   143  		c.state = stateAfterName
   144  	}
   145  	return c, i
   146  }
   147  
   148  // tAfterName is the context transition function for stateAfterName.
   149  func tAfterName(c context, s []byte) (context, int) {
   150  	// Look for the start of the value.
   151  	i := eatWhiteSpace(s, 0)
   152  	if i == len(s) {
   153  		return c, len(s)
   154  	} else if s[i] != '=' {
   155  		// Occurs due to tag ending '>', and valueless attribute.
   156  		c.state = stateTag
   157  		return c, i
   158  	}
   159  	c.state = stateBeforeValue
   160  	// Consume the "=".
   161  	return c, i + 1
   162  }
   163  
   164  var attrStartStates = [...]state{
   165  	attrNone:       stateAttr,
   166  	attrScript:     stateJS,
   167  	attrScriptType: stateAttr,
   168  	attrStyle:      stateCSS,
   169  	attrURL:        stateURL,
   170  	attrSrcset:     stateSrcset,
   171  }
   172  
   173  // tBeforeValue is the context transition function for stateBeforeValue.
   174  func tBeforeValue(c context, s []byte) (context, int) {
   175  	i := eatWhiteSpace(s, 0)
   176  	if i == len(s) {
   177  		return c, len(s)
   178  	}
   179  	// Find the attribute delimiter.
   180  	delim := delimSpaceOrTagEnd
   181  	switch s[i] {
   182  	case '\'':
   183  		delim, i = delimSingleQuote, i+1
   184  	case '"':
   185  		delim, i = delimDoubleQuote, i+1
   186  	}
   187  	c.state, c.delim = attrStartStates[c.attr], delim
   188  	return c, i
   189  }
   190  
   191  // tHTMLCmt is the context transition function for stateHTMLCmt.
   192  func tHTMLCmt(c context, s []byte) (context, int) {
   193  	if i := bytes.Index(s, commentEnd); i != -1 {
   194  		return context{}, i + 3
   195  	}
   196  	return c, len(s)
   197  }
   198  
   199  // specialTagEndMarkers maps element types to the character sequence that
   200  // case-insensitively signals the end of the special tag body.
   201  var specialTagEndMarkers = [...][]byte{
   202  	elementScript:   []byte("script"),
   203  	elementStyle:    []byte("style"),
   204  	elementTextarea: []byte("textarea"),
   205  	elementTitle:    []byte("title"),
   206  }
   207  
   208  var (
   209  	specialTagEndPrefix = []byte("</")
   210  	tagEndSeparators    = []byte("> \t\n\f/")
   211  )
   212  
   213  // tSpecialTagEnd is the context transition function for raw text and RCDATA
   214  // element states.
   215  func tSpecialTagEnd(c context, s []byte) (context, int) {
   216  	if c.element != elementNone {
   217  		// script end tags ("</script") within script literals are ignored, so that
   218  		// we can properly escape them.
   219  		if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
   220  			return c, len(s)
   221  		}
   222  		if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
   223  			return context{}, i
   224  		}
   225  	}
   226  	return c, len(s)
   227  }
   228  
   229  // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
   230  func indexTagEnd(s []byte, tag []byte) int {
   231  	res := 0
   232  	plen := len(specialTagEndPrefix)
   233  	for len(s) > 0 {
   234  		// Try to find the tag end prefix first
   235  		i := bytes.Index(s, specialTagEndPrefix)
   236  		if i == -1 {
   237  			return i
   238  		}
   239  		s = s[i+plen:]
   240  		// Try to match the actual tag if there is still space for it
   241  		if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
   242  			s = s[len(tag):]
   243  			// Check the tag is followed by a proper separator
   244  			if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
   245  				return res + i
   246  			}
   247  			res += len(tag)
   248  		}
   249  		res += i + plen
   250  	}
   251  	return -1
   252  }
   253  
   254  // tAttr is the context transition function for the attribute state.
   255  func tAttr(c context, s []byte) (context, int) {
   256  	return c, len(s)
   257  }
   258  
   259  // tURL is the context transition function for the URL state.
   260  func tURL(c context, s []byte) (context, int) {
   261  	if bytes.ContainsAny(s, "#?") {
   262  		c.urlPart = urlPartQueryOrFrag
   263  	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
   264  		// HTML5 uses "Valid URL potentially surrounded by spaces" for
   265  		// attrs: https://www.w3.org/TR/html5/index.html#attributes-1
   266  		c.urlPart = urlPartPreQuery
   267  	}
   268  	return c, len(s)
   269  }
   270  
   271  // tJS is the context transition function for the JS state.
   272  func tJS(c context, s []byte) (context, int) {
   273  	i := bytes.IndexAny(s, "\"`'/{}<-#")
   274  	if i == -1 {
   275  		// Entire input is non string, comment, regexp tokens.
   276  		c.jsCtx = nextJSCtx(s, c.jsCtx)
   277  		return c, len(s)
   278  	}
   279  	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
   280  	switch s[i] {
   281  	case '"':
   282  		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
   283  	case '\'':
   284  		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
   285  	case '`':
   286  		c.state, c.jsCtx = stateJSTmplLit, jsCtxRegexp
   287  	case '/':
   288  		switch {
   289  		case i+1 < len(s) && s[i+1] == '/':
   290  			c.state, i = stateJSLineCmt, i+1
   291  		case i+1 < len(s) && s[i+1] == '*':
   292  			c.state, i = stateJSBlockCmt, i+1
   293  		case c.jsCtx == jsCtxRegexp:
   294  			c.state = stateJSRegexp
   295  		case c.jsCtx == jsCtxDivOp:
   296  			c.jsCtx = jsCtxRegexp
   297  		default:
   298  			return context{
   299  				state: stateError,
   300  				err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
   301  			}, len(s)
   302  		}
   303  	// ECMAScript supports HTML style comments for legacy reasons, see Appendix
   304  	// B.1.1 "HTML-like Comments". The handling of these comments is somewhat
   305  	// confusing. Multi-line comments are not supported, i.e. anything on lines
   306  	// between the opening and closing tokens is not considered a comment, but
   307  	// anything following the opening or closing token, on the same line, is
   308  	// ignored. As such we simply treat any line prefixed with "<!--" or "-->"
   309  	// as if it were actually prefixed with "//" and move on.
   310  	case '<':
   311  		if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) {
   312  			c.state, i = stateJSHTMLOpenCmt, i+3
   313  		}
   314  	case '-':
   315  		if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) {
   316  			c.state, i = stateJSHTMLCloseCmt, i+2
   317  		}
   318  	// ECMAScript also supports "hashbang" comment lines, see Section 12.5.
   319  	case '#':
   320  		if i+1 < len(s) && s[i+1] == '!' {
   321  			c.state, i = stateJSLineCmt, i+1
   322  		}
   323  	case '{':
   324  		// We only care about tracking brace depth if we are inside of a
   325  		// template literal.
   326  		if len(c.jsBraceDepth) == 0 {
   327  			return c, i + 1
   328  		}
   329  		c.jsBraceDepth[len(c.jsBraceDepth)-1]++
   330  	case '}':
   331  		if len(c.jsBraceDepth) == 0 {
   332  			return c, i + 1
   333  		}
   334  		// There are no cases where a brace can be escaped in the JS context
   335  		// that are not syntax errors, it seems. Because of this we can just
   336  		// count "\}" as "}" and move on, the script is already broken as
   337  		// fully fledged parsers will just fail anyway.
   338  		c.jsBraceDepth[len(c.jsBraceDepth)-1]--
   339  		if c.jsBraceDepth[len(c.jsBraceDepth)-1] >= 0 {
   340  			return c, i + 1
   341  		}
   342  		c.jsBraceDepth = c.jsBraceDepth[:len(c.jsBraceDepth)-1]
   343  		c.state = stateJSTmplLit
   344  	default:
   345  		panic("unreachable")
   346  	}
   347  	return c, i + 1
   348  }
   349  
   350  func tJSTmpl(c context, s []byte) (context, int) {
   351  	var k int
   352  	for {
   353  		i := k + bytes.IndexAny(s[k:], "`\\$")
   354  		if i < k {
   355  			break
   356  		}
   357  		switch s[i] {
   358  		case '\\':
   359  			i++
   360  			if i == len(s) {
   361  				return context{
   362  					state: stateError,
   363  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
   364  				}, len(s)
   365  			}
   366  		case '$':
   367  			if len(s) >= i+2 && s[i+1] == '{' {
   368  				c.jsBraceDepth = append(c.jsBraceDepth, 0)
   369  				c.state = stateJS
   370  				return c, i + 2
   371  			}
   372  		case '`':
   373  			// end
   374  			c.state = stateJS
   375  			return c, i + 1
   376  		}
   377  		k = i + 1
   378  	}
   379  
   380  	return c, len(s)
   381  }
   382  
   383  // tJSDelimited is the context transition function for the JS string and regexp
   384  // states.
   385  func tJSDelimited(c context, s []byte) (context, int) {
   386  	specials := `\"`
   387  	switch c.state {
   388  	case stateJSSqStr:
   389  		specials = `\'`
   390  	case stateJSRegexp:
   391  		specials = `\/[]`
   392  	}
   393  
   394  	k, inCharset := 0, false
   395  	for {
   396  		i := k + bytes.IndexAny(s[k:], specials)
   397  		if i < k {
   398  			break
   399  		}
   400  		switch s[i] {
   401  		case '\\':
   402  			i++
   403  			if i == len(s) {
   404  				return context{
   405  					state: stateError,
   406  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
   407  				}, len(s)
   408  			}
   409  		case '[':
   410  			inCharset = true
   411  		case ']':
   412  			inCharset = false
   413  		case '/':
   414  			// If "</script" appears in a regex literal, the '/' should not
   415  			// close the regex literal, and it will later be escaped to
   416  			// "\x3C/script" in escapeText.
   417  			if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 {
   418  				i++
   419  			} else if !inCharset {
   420  				c.state, c.jsCtx = stateJS, jsCtxDivOp
   421  				return c, i + 1
   422  			}
   423  		default:
   424  			// end delimiter
   425  			if !inCharset {
   426  				c.state, c.jsCtx = stateJS, jsCtxDivOp
   427  				return c, i + 1
   428  			}
   429  		}
   430  		k = i + 1
   431  	}
   432  
   433  	if inCharset {
   434  		// This can be fixed by making context richer if interpolation
   435  		// into charsets is desired.
   436  		return context{
   437  			state: stateError,
   438  			err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
   439  		}, len(s)
   440  	}
   441  
   442  	return c, len(s)
   443  }
   444  
   445  var blockCommentEnd = []byte("*/")
   446  
   447  // tBlockCmt is the context transition function for /*comment*/ states.
   448  func tBlockCmt(c context, s []byte) (context, int) {
   449  	i := bytes.Index(s, blockCommentEnd)
   450  	if i == -1 {
   451  		return c, len(s)
   452  	}
   453  	switch c.state {
   454  	case stateJSBlockCmt:
   455  		c.state = stateJS
   456  	case stateCSSBlockCmt:
   457  		c.state = stateCSS
   458  	default:
   459  		panic(c.state.String())
   460  	}
   461  	return c, i + 2
   462  }
   463  
   464  // tLineCmt is the context transition function for //comment states, and the JS HTML-like comment state.
   465  func tLineCmt(c context, s []byte) (context, int) {
   466  	var lineTerminators string
   467  	var endState state
   468  	switch c.state {
   469  	case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt:
   470  		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
   471  	case stateCSSLineCmt:
   472  		lineTerminators, endState = "\n\f\r", stateCSS
   473  		// Line comments are not part of any published CSS standard but
   474  		// are supported by the 4 major browsers.
   475  		// This defines line comments as
   476  		//     LINECOMMENT ::= "//" [^\n\f\d]*
   477  		// since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
   478  		// newlines:
   479  		//     nl ::= #xA | #xD #xA | #xD | #xC
   480  	default:
   481  		panic(c.state.String())
   482  	}
   483  
   484  	i := bytes.IndexAny(s, lineTerminators)
   485  	if i == -1 {
   486  		return c, len(s)
   487  	}
   488  	c.state = endState
   489  	// Per section 7.4 of EcmaScript 5 : https://es5.github.io/#x7.4
   490  	// "However, the LineTerminator at the end of the line is not
   491  	// considered to be part of the single-line comment; it is
   492  	// recognized separately by the lexical grammar and becomes part
   493  	// of the stream of input elements for the syntactic grammar."
   494  	return c, i
   495  }
   496  
   497  // tCSS is the context transition function for the CSS state.
   498  func tCSS(c context, s []byte) (context, int) {
   499  	// CSS quoted strings are almost never used except for:
   500  	// (1) URLs as in background: "/foo.png"
   501  	// (2) Multiword font-names as in font-family: "Times New Roman"
   502  	// (3) List separators in content values as in inline-lists:
   503  	//    <style>
   504  	//    ul.inlineList { list-style: none; padding:0 }
   505  	//    ul.inlineList > li { display: inline }
   506  	//    ul.inlineList > li:before { content: ", " }
   507  	//    ul.inlineList > li:first-child:before { content: "" }
   508  	//    </style>
   509  	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
   510  	// (4) Attribute value selectors as in a[href="http://example.com/"]
   511  	//
   512  	// We conservatively treat all strings as URLs, but make some
   513  	// allowances to avoid confusion.
   514  	//
   515  	// In (1), our conservative assumption is justified.
   516  	// In (2), valid font names do not contain ':', '?', or '#', so our
   517  	// conservative assumption is fine since we will never transition past
   518  	// urlPartPreQuery.
   519  	// In (3), our protocol heuristic should not be tripped, and there
   520  	// should not be non-space content after a '?' or '#', so as long as
   521  	// we only %-encode RFC 3986 reserved characters we are ok.
   522  	// In (4), we should URL escape for URL attributes, and for others we
   523  	// have the attribute name available if our conservative assumption
   524  	// proves problematic for real code.
   525  
   526  	k := 0
   527  	for {
   528  		i := k + bytes.IndexAny(s[k:], `("'/`)
   529  		if i < k {
   530  			return c, len(s)
   531  		}
   532  		switch s[i] {
   533  		case '(':
   534  			// Look for url to the left.
   535  			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
   536  			if endsWithCSSKeyword(p, "url") {
   537  				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
   538  				switch {
   539  				case j != len(s) && s[j] == '"':
   540  					c.state, j = stateCSSDqURL, j+1
   541  				case j != len(s) && s[j] == '\'':
   542  					c.state, j = stateCSSSqURL, j+1
   543  				default:
   544  					c.state = stateCSSURL
   545  				}
   546  				return c, j
   547  			}
   548  		case '/':
   549  			if i+1 < len(s) {
   550  				switch s[i+1] {
   551  				case '/':
   552  					c.state = stateCSSLineCmt
   553  					return c, i + 2
   554  				case '*':
   555  					c.state = stateCSSBlockCmt
   556  					return c, i + 2
   557  				}
   558  			}
   559  		case '"':
   560  			c.state = stateCSSDqStr
   561  			return c, i + 1
   562  		case '\'':
   563  			c.state = stateCSSSqStr
   564  			return c, i + 1
   565  		}
   566  		k = i + 1
   567  	}
   568  }
   569  
   570  // tCSSStr is the context transition function for the CSS string and URL states.
   571  func tCSSStr(c context, s []byte) (context, int) {
   572  	var endAndEsc string
   573  	switch c.state {
   574  	case stateCSSDqStr, stateCSSDqURL:
   575  		endAndEsc = `\"`
   576  	case stateCSSSqStr, stateCSSSqURL:
   577  		endAndEsc = `\'`
   578  	case stateCSSURL:
   579  		// Unquoted URLs end with a newline or close parenthesis.
   580  		// The below includes the wc (whitespace character) and nl.
   581  		endAndEsc = "\\\t\n\f\r )"
   582  	default:
   583  		panic(c.state.String())
   584  	}
   585  
   586  	k := 0
   587  	for {
   588  		i := k + bytes.IndexAny(s[k:], endAndEsc)
   589  		if i < k {
   590  			c, nread := tURL(c, decodeCSS(s[k:]))
   591  			return c, k + nread
   592  		}
   593  		if s[i] == '\\' {
   594  			i++
   595  			if i == len(s) {
   596  				return context{
   597  					state: stateError,
   598  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
   599  				}, len(s)
   600  			}
   601  		} else {
   602  			c.state = stateCSS
   603  			return c, i + 1
   604  		}
   605  		c, _ = tURL(c, decodeCSS(s[:i+1]))
   606  		k = i + 1
   607  	}
   608  }
   609  
   610  // tError is the context transition function for the error state.
   611  func tError(c context, s []byte) (context, int) {
   612  	return c, len(s)
   613  }
   614  
   615  // eatAttrName returns the largest j such that s[i:j] is an attribute name.
   616  // It returns an error if s[i:] does not look like it begins with an
   617  // attribute name, such as encountering a quote mark without a preceding
   618  // equals sign.
   619  func eatAttrName(s []byte, i int) (int, *Error) {
   620  	for j := i; j < len(s); j++ {
   621  		switch s[j] {
   622  		case ' ', '\t', '\n', '\f', '\r', '=', '>':
   623  			return j, nil
   624  		case '\'', '"', '<':
   625  			// These result in a parse warning in HTML5 and are
   626  			// indicative of serious problems if seen in an attr
   627  			// name in a template.
   628  			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
   629  		default:
   630  			// No-op.
   631  		}
   632  	}
   633  	return len(s), nil
   634  }
   635  
   636  var elementNameMap = map[string]element{
   637  	"script":   elementScript,
   638  	"style":    elementStyle,
   639  	"textarea": elementTextarea,
   640  	"title":    elementTitle,
   641  }
   642  
   643  // asciiAlpha reports whether c is an ASCII letter.
   644  func asciiAlpha(c byte) bool {
   645  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   646  }
   647  
   648  // asciiAlphaNum reports whether c is an ASCII letter or digit.
   649  func asciiAlphaNum(c byte) bool {
   650  	return asciiAlpha(c) || '0' <= c && c <= '9'
   651  }
   652  
   653  // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
   654  func eatTagName(s []byte, i int) (int, element) {
   655  	if i == len(s) || !asciiAlpha(s[i]) {
   656  		return i, elementNone
   657  	}
   658  	j := i + 1
   659  	for j < len(s) {
   660  		x := s[j]
   661  		if asciiAlphaNum(x) {
   662  			j++
   663  			continue
   664  		}
   665  		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
   666  		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
   667  			j += 2
   668  			continue
   669  		}
   670  		break
   671  	}
   672  	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
   673  }
   674  
   675  // eatWhiteSpace returns the largest j such that s[i:j] is white space.
   676  func eatWhiteSpace(s []byte, i int) int {
   677  	for j := i; j < len(s); j++ {
   678  		switch s[j] {
   679  		case ' ', '\t', '\n', '\f', '\r':
   680  			// No-op.
   681  		default:
   682  			return j
   683  		}
   684  	}
   685  	return len(s)
   686  }
   687
View as plain text