libgo/go/html/template/css.go

   1 // Copyright 2011 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package template
   6
   7 import (
   8         "bytes"
   9         "fmt"
  10         "unicode"
  11         "unicode/utf8"
  12 )
  13
  14 // endsWithCSSKeyword returns whether b ends with an ident that
  15 // case-insensitively matches the lower-case kw.
  16 func endsWithCSSKeyword(b []byte, kw string) bool {
  17         i := len(b) - len(kw)
  18         if i < 0 {
  19                 // Too short.
  20                 return false
  21         }
  22         if i != 0 {
  23                 r, _ := utf8.DecodeLastRune(b[:i])
  24                 if isCSSNmchar(r) {
  25                         // Too long.
  26                         return false
  27                 }
  28         }
  29         // Many CSS keywords, such as "!important" can have characters encoded,
  30         // but the URI production does not allow that according to
  31         // http://www.w3.org/TR/css3-syntax/#TOK-URI
  32         // This does not attempt to recognize encoded keywords. For example,
  33         // given "\75\72\6c" and "url" this return false.
  34         return string(bytes.ToLower(b[i:])) == kw
  35 }
  36
  37 // isCSSNmchar returns whether rune is allowed anywhere in a CSS identifier.
  38 func isCSSNmchar(r rune) bool {
  39         // Based on the CSS3 nmchar production but ignores multi-rune escape
  40         // sequences.
  41         // http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
  42         return 'a' <= r && r <= 'z' ||
  43                 'A' <= r && r <= 'Z' ||
  44                 '0' <= r && r <= '9' ||
  45                 r == '-' ||
  46                 r == '_' ||
  47                 // Non-ASCII cases below.
  48                 0x80 <= r && r <= 0xd7ff ||
  49                 0xe000 <= r && r <= 0xfffd ||
  50                 0x10000 <= r && r <= 0x10ffff
  51 }
  52
  53 // decodeCSS decodes CSS3 escapes given a sequence of stringchars.
  54 // If there is no change, it returns the input, otherwise it returns a slice
  55 // backed by a new array.
  56 // http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
  57 func decodeCSS(s []byte) []byte {
  58         i := bytes.IndexByte(s, '\\')
  59         if i == -1 {
  60                 return s
  61         }
  62         // The UTF-8 sequence for a codepoint is never longer than 1 + the
  63         // number hex digits need to represent that codepoint, so len(s) is an
  64         // upper bound on the output length.
  65         b := make([]byte, 0, len(s))
  66         for len(s) != 0 {
  67                 i := bytes.IndexByte(s, '\\')
  68                 if i == -1 {
  69                         i = len(s)
  70                 }
  71                 b, s = append(b, s[:i]...), s[i:]
  72                 if len(s) < 2 {
  73                         break
  74                 }
  75                 // http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
  76                 // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
  77                 if isHex(s[1]) {
  78                         // http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
  79                         //   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
  80                         j := 2
  81                         for j < len(s) && j < 7 && isHex(s[j]) {
  82                                 j++
  83                         }
  84                         r := hexDecode(s[1:j])
  85                         if r > unicode.MaxRune {
  86                                 r, j = r/16, j-1
  87                         }
  88                         n := utf8.EncodeRune(b[len(b):cap(b)], r)
  89                         // The optional space at the end allows a hex
  90                         // sequence to be followed by a literal hex.
  91                         // string(decodeCSS([]byte(`\A B`))) == "\nB"
  92                         b, s = b[:len(b)+n], skipCSSSpace(s[j:])
  93                 } else {
  94                         // `\\` decodes to `\` and `\"` to `"`.
  95                         _, n := utf8.DecodeRune(s[1:])
  96                         b, s = append(b, s[1:1+n]...), s[1+n:]
  97                 }
  98         }
  99         return b
 100 }
 101
 102 // isHex returns whether the given character is a hex digit.
 103 func isHex(c byte) bool {
 104         return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
 105 }
 106
 107 // hexDecode decodes a short hex digit sequence: "10" -> 16.
 108 func hexDecode(s []byte) rune {
 109         n := '\x00'
 110         for _, c := range s {
 111                 n <<= 4
 112                 switch {
 113                 case '0' <= c && c <= '9':
 114                         n |= rune(c - '0')
 115                 case 'a' <= c && c <= 'f':
 116                         n |= rune(c-'a') + 10
 117                 case 'A' <= c && c <= 'F':
 118                         n |= rune(c-'A') + 10
 119                 default:
 120                         panic(fmt.Sprintf("Bad hex digit in %q", s))
 121                 }
 122         }
 123         return n
 124 }
 125
 126 // skipCSSSpace returns a suffix of c, skipping over a single space.
 127 func skipCSSSpace(c []byte) []byte {
 128         if len(c) == 0 {
 129                 return c
 130         }
 131         // wc ::= #x9 | #xA | #xC | #xD | #x20
 132         switch c[0] {
 133         case '\t', '\n', '\f', ' ':
 134                 return c[1:]
 135         case '\r':
 136                 // This differs from CSS3's wc production because it contains a
 137                 // probable spec error whereby wc contains all the single byte
 138                 // sequences in nl (newline) but not CRLF.
 139                 if len(c) >= 2 && c[1] == '\n' {
 140                         return c[2:]
 141                 }
 142                 return c[1:]
 143         }
 144         return c
 145 }
 146
 147 // isCSSSpace returns whether b is a CSS space char as defined in wc.
 148 func isCSSSpace(b byte) bool {
 149         switch b {
 150         case '\t', '\n', '\f', '\r', ' ':
 151                 return true
 152         }
 153         return false
 154 }
 155
 156 // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
 157 func cssEscaper(args ...interface{}) string {
 158         s, _ := stringify(args...)
 159         var b bytes.Buffer
 160         written := 0
 161         for i, r := range s {
 162                 var repl string
 163                 switch r {
 164                 case 0:
 165                         repl = `\0`
 166                 case '\t':
 167                         repl = `\9`
 168                 case '\n':
 169                         repl = `\a`
 170                 case '\f':
 171                         repl = `\c`
 172                 case '\r':
 173                         repl = `\d`
 174                 // Encode HTML specials as hex so the output can be embedded
 175                 // in HTML attributes without further encoding.
 176                 case '"':
 177                         repl = `\22`
 178                 case '&':
 179                         repl = `\26`
 180                 case '\'':
 181                         repl = `\27`
 182                 case '(':
 183                         repl = `\28`
 184                 case ')':
 185                         repl = `\29`
 186                 case '+':
 187                         repl = `\2b`
 188                 case '/':
 189                         repl = `\2f`
 190                 case ':':
 191                         repl = `\3a`
 192                 case ';':
 193                         repl = `\3b`
 194                 case '<':
 195                         repl = `\3c`
 196                 case '>':
 197                         repl = `\3e`
 198                 case '\\':
 199                         repl = `\\`
 200                 case '{':
 201                         repl = `\7b`
 202                 case '}':
 203                         repl = `\7d`
 204                 default:
 205                         continue
 206                 }
 207                 b.WriteString(s[written:i])
 208                 b.WriteString(repl)
 209                 written = i + utf8.RuneLen(r)
 210                 if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
 211                         b.WriteByte(' ')
 212                 }
 213         }
 214         if written == 0 {
 215                 return s
 216         }
 217         b.WriteString(s[written:])
 218         return b.String()
 219 }
 220
 221 var expressionBytes = []byte("expression")
 222 var mozBindingBytes = []byte("mozbinding")
 223
 224 // cssValueFilter allows innocuous CSS values in the output including CSS
 225 // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
 226 // (inherit, blue), and colors (#888).
 227 // It filters out unsafe values, such as those that affect token boundaries,
 228 // and anything that might execute scripts.
 229 func cssValueFilter(args ...interface{}) string {
 230         s, t := stringify(args...)
 231         if t == contentTypeCSS {
 232                 return s
 233         }
 234         b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
 235
 236         // CSS3 error handling is specified as honoring string boundaries per
 237         // http://www.w3.org/TR/css3-syntax/#error-handling :
 238         //     Malformed declarations. User agents must handle unexpected
 239         //     tokens encountered while parsing a declaration by reading until
 240         //     the end of the declaration, while observing the rules for
 241         //     matching pairs of (), [], {}, "", and '', and correctly handling
 242         //     escapes. For example, a malformed declaration may be missing a
 243         //     property, colon (:) or value.
 244         // So we need to make sure that values do not have mismatched bracket
 245         // or quote characters to prevent the browser from restarting parsing
 246         // inside a string that might embed JavaScript source.
 247         for i, c := range b {
 248                 switch c {
 249                 case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
 250                         return filterFailsafe
 251                 case '-':
 252                         // Disallow <!-- or -->.
 253                         // -- should not appear in valid identifiers.
 254                         if i != 0 && b[i-1] == '-' {
 255                                 return filterFailsafe
 256                         }
 257                 default:
 258                         if c < 0x80 && isCSSNmchar(rune(c)) {
 259                                 id = append(id, c)
 260                         }
 261                 }
 262         }
 263         id = bytes.ToLower(id)
 264         if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 {
 265                 return filterFailsafe
 266         }
 267         return string(b)
 268 }