css.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strings"
    11  	"unicode"
    12  	"unicode/utf8"
    13  )
    14  
    15  // endsWithCSSKeyword reports whether b ends with an ident that
    16  // case-insensitively matches the lower-case kw.
    17  func endsWithCSSKeyword(b []byte, kw string) bool {
    18  	i := len(b) - len(kw)
    19  	if i < 0 {
    20  		// Too short.
    21  		return false
    22  	}
    23  	if i != 0 {
    24  		r, _ := utf8.DecodeLastRune(b[:i])
    25  		if isCSSNmchar(r) {
    26  			// Too long.
    27  			return false
    28  		}
    29  	}
    30  	// Many CSS keywords, such as "!important" can have characters encoded,
    31  	// but the URI production does not allow that according to
    32  	// https://www.w3.org/TR/css3-syntax/#TOK-URI
    33  	// This does not attempt to recognize encoded keywords. For example,
    34  	// given "\75\72\6c" and "url" this return false.
    35  	return string(bytes.ToLower(b[i:])) == kw
    36  }
    37  
    38  // isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
    39  func isCSSNmchar(r rune) bool {
    40  	// Based on the CSS3 nmchar production but ignores multi-rune escape
    41  	// sequences.
    42  	// https://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
    43  	return 'a' <= r && r <= 'z' ||
    44  		'A' <= r && r <= 'Z' ||
    45  		'0' <= r && r <= '9' ||
    46  		r == '-' ||
    47  		r == '_' ||
    48  		// Non-ASCII cases below.
    49  		0x80 <= r && r <= 0xd7ff ||
    50  		0xe000 <= r && r <= 0xfffd ||
    51  		0x10000 <= r && r <= 0x10ffff
    52  }
    53  
    54  // decodeCSS decodes CSS3 escapes given a sequence of stringchars.
    55  // If there is no change, it returns the input, otherwise it returns a slice
    56  // backed by a new array.
    57  // https://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
    58  func decodeCSS(s []byte) []byte {
    59  	i := bytes.IndexByte(s, '\\')
    60  	if i == -1 {
    61  		return s
    62  	}
    63  	// The UTF-8 sequence for a codepoint is never longer than 1 + the
    64  	// number hex digits need to represent that codepoint, so len(s) is an
    65  	// upper bound on the output length.
    66  	b := make([]byte, 0, len(s))
    67  	for len(s) != 0 {
    68  		i := bytes.IndexByte(s, '\\')
    69  		if i == -1 {
    70  			i = len(s)
    71  		}
    72  		b, s = append(b, s[:i]...), s[i:]
    73  		if len(s) < 2 {
    74  			break
    75  		}
    76  		// https://www.w3.org/TR/css3-syntax/#SUBTOK-escape
    77  		// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
    78  		if isHex(s[1]) {
    79  			// https://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
    80  			//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
    81  			j := 2
    82  			for j < len(s) && j < 7 && isHex(s[j]) {
    83  				j++
    84  			}
    85  			r := hexDecode(s[1:j])
    86  			if r > unicode.MaxRune {
    87  				r, j = r/16, j-1
    88  			}
    89  			n := utf8.EncodeRune(b[len(b):cap(b)], r)
    90  			// The optional space at the end allows a hex
    91  			// sequence to be followed by a literal hex.
    92  			// string(decodeCSS([]byte(`\A B`))) == "\nB"
    93  			b, s = b[:len(b)+n], skipCSSSpace(s[j:])
    94  		} else {
    95  			// `\\` decodes to `\` and `\"` to `"`.
    96  			_, n := utf8.DecodeRune(s[1:])
    97  			b, s = append(b, s[1:1+n]...), s[1+n:]
    98  		}
    99  	}
   100  	return b
   101  }
   102  
   103  // isHex reports whether the given character is a hex digit.
   104  func isHex(c byte) bool {
   105  	return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
   106  }
   107  
   108  // hexDecode decodes a short hex digit sequence: "10" -> 16.
   109  func hexDecode(s []byte) rune {
   110  	n := '\x00'
   111  	for _, c := range s {
   112  		n <<= 4
   113  		switch {
   114  		case '0' <= c && c <= '9':
   115  			n |= rune(c - '0')
   116  		case 'a' <= c && c <= 'f':
   117  			n |= rune(c-'a') + 10
   118  		case 'A' <= c && c <= 'F':
   119  			n |= rune(c-'A') + 10
   120  		default:
   121  			panic(fmt.Sprintf("Bad hex digit in %q", s))
   122  		}
   123  	}
   124  	return n
   125  }
   126  
   127  // skipCSSSpace returns a suffix of c, skipping over a single space.
   128  func skipCSSSpace(c []byte) []byte {
   129  	if len(c) == 0 {
   130  		return c
   131  	}
   132  	// wc ::= #x9 | #xA | #xC | #xD | #x20
   133  	switch c[0] {
   134  	case '\t', '\n', '\f', ' ':
   135  		return c[1:]
   136  	case '\r':
   137  		// This differs from CSS3's wc production because it contains a
   138  		// probable spec error whereby wc contains all the single byte
   139  		// sequences in nl (newline) but not CRLF.
   140  		if len(c) >= 2 && c[1] == '\n' {
   141  			return c[2:]
   142  		}
   143  		return c[1:]
   144  	}
   145  	return c
   146  }
   147  
   148  // isCSSSpace reports whether b is a CSS space char as defined in wc.
   149  func isCSSSpace(b byte) bool {
   150  	switch b {
   151  	case '\t', '\n', '\f', '\r', ' ':
   152  		return true
   153  	}
   154  	return false
   155  }
   156  
   157  // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
   158  func cssEscaper(args ...any) string {
   159  	s, _ := stringify(args...)
   160  	var b strings.Builder
   161  	r, w, written := rune(0), 0, 0
   162  	for i := 0; i < len(s); i += w {
   163  		// See comment in htmlEscaper.
   164  		r, w = utf8.DecodeRuneInString(s[i:])
   165  		var repl string
   166  		switch {
   167  		case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
   168  			repl = cssReplacementTable[r]
   169  		default:
   170  			continue
   171  		}
   172  		if written == 0 {
   173  			b.Grow(len(s))
   174  		}
   175  		b.WriteString(s[written:i])
   176  		b.WriteString(repl)
   177  		written = i + w
   178  		if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
   179  			b.WriteByte(' ')
   180  		}
   181  	}
   182  	if written == 0 {
   183  		return s
   184  	}
   185  	b.WriteString(s[written:])
   186  	return b.String()
   187  }
   188  
   189  var cssReplacementTable = []string{
   190  	0:    `\0`,
   191  	'\t': `\9`,
   192  	'\n': `\a`,
   193  	'\f': `\c`,
   194  	'\r': `\d`,
   195  	// Encode HTML specials as hex so the output can be embedded
   196  	// in HTML attributes without further encoding.
   197  	'"':  `\22`,
   198  	'&':  `\26`,
   199  	'\'': `\27`,
   200  	'(':  `\28`,
   201  	')':  `\29`,
   202  	'+':  `\2b`,
   203  	'/':  `\2f`,
   204  	':':  `\3a`,
   205  	';':  `\3b`,
   206  	'<':  `\3c`,
   207  	'>':  `\3e`,
   208  	'\\': `\\`,
   209  	'{':  `\7b`,
   210  	'}':  `\7d`,
   211  }
   212  
   213  var expressionBytes = []byte("expression")
   214  var mozBindingBytes = []byte("mozbinding")
   215  
   216  // cssValueFilter allows innocuous CSS values in the output including CSS
   217  // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
   218  // (inherit, blue), and colors (#888).
   219  // It filters out unsafe values, such as those that affect token boundaries,
   220  // and anything that might execute scripts.
   221  func cssValueFilter(args ...any) string {
   222  	s, t := stringify(args...)
   223  	if t == contentTypeCSS {
   224  		return s
   225  	}
   226  	b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
   227  
   228  	// CSS3 error handling is specified as honoring string boundaries per
   229  	// https://www.w3.org/TR/css3-syntax/#error-handling :
   230  	//     Malformed declarations. User agents must handle unexpected
   231  	//     tokens encountered while parsing a declaration by reading until
   232  	//     the end of the declaration, while observing the rules for
   233  	//     matching pairs of (), [], {}, "", and '', and correctly handling
   234  	//     escapes. For example, a malformed declaration may be missing a
   235  	//     property, colon (:) or value.
   236  	// So we need to make sure that values do not have mismatched bracket
   237  	// or quote characters to prevent the browser from restarting parsing
   238  	// inside a string that might embed JavaScript source.
   239  	for i, c := range b {
   240  		switch c {
   241  		case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}', '<', '>':
   242  			return filterFailsafe
   243  		case '-':
   244  			// Disallow <!-- or -->.
   245  			// -- should not appear in valid identifiers.
   246  			if i != 0 && b[i-1] == '-' {
   247  				return filterFailsafe
   248  			}
   249  		default:
   250  			if c < utf8.RuneSelf && isCSSNmchar(rune(c)) {
   251  				id = append(id, c)
   252  			}
   253  		}
   254  	}
   255  	id = bytes.ToLower(id)
   256  	if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) {
   257  		return filterFailsafe
   258  	}
   259  	return string(b)
   260  }
   261
View as plain text