libgo/go/html/template/html.go

   1 // Copyright 2011 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package template
   6
   7 import (
   8         "bytes"
   9         "fmt"
  10         "strings"
  11         "unicode/utf8"
  12 )
  13
  14 // htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
  15 func htmlNospaceEscaper(args ...interface{}) string {
  16         s, t := stringify(args...)
  17         if t == contentTypeHTML {
  18                 return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
  19         }
  20         return htmlReplacer(s, htmlNospaceReplacementTable, false)
  21 }
  22
  23 // attrEscaper escapes for inclusion in quoted attribute values.
  24 func attrEscaper(args ...interface{}) string {
  25         s, t := stringify(args...)
  26         if t == contentTypeHTML {
  27                 return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
  28         }
  29         return htmlReplacer(s, htmlReplacementTable, true)
  30 }
  31
  32 // rcdataEscaper escapes for inclusion in an RCDATA element body.
  33 func rcdataEscaper(args ...interface{}) string {
  34         s, t := stringify(args...)
  35         if t == contentTypeHTML {
  36                 return htmlReplacer(s, htmlNormReplacementTable, true)
  37         }
  38         return htmlReplacer(s, htmlReplacementTable, true)
  39 }
  40
  41 // htmlEscaper escapes for inclusion in HTML text.
  42 func htmlEscaper(args ...interface{}) string {
  43         s, t := stringify(args...)
  44         if t == contentTypeHTML {
  45                 return s
  46         }
  47         return htmlReplacer(s, htmlReplacementTable, true)
  48 }
  49
  50 // htmlReplacementTable contains the runes that need to be escaped
  51 // inside a quoted attribute value or in a text node.
  52 var htmlReplacementTable = []string{
  53         // http://www.w3.org/TR/html5/tokenization.html#attribute-value-unquoted-state: "
  54         // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
  55         // CHARACTER character to the current attribute's value.
  56         // "
  57         // and similarly
  58         // http://www.w3.org/TR/html5/tokenization.html#before-attribute-value-state
  59         0:    "\uFFFD",
  60         '"':  "&#34;",
  61         '&':  "&amp;",
  62         '\'': "&#39;",
  63         '+':  "&#43;",
  64         '<':  "&lt;",
  65         '>':  "&gt;",
  66 }
  67
  68 // htmlNormReplacementTable is like htmlReplacementTable but without '&' to
  69 // avoid over-encoding existing entities.
  70 var htmlNormReplacementTable = []string{
  71         0:    "\uFFFD",
  72         '"':  "&#34;",
  73         '\'': "&#39;",
  74         '+':  "&#43;",
  75         '<':  "&lt;",
  76         '>':  "&gt;",
  77 }
  78
  79 // htmlNospaceReplacementTable contains the runes that need to be escaped
  80 // inside an unquoted attribute value.
  81 // The set of runes escaped is the union of the HTML specials and
  82 // those determined by running the JS below in browsers:
  83 // <div id=d></div>
  84 // <script>(function () {
  85 // var a = [], d = document.getElementById("d"), i, c, s;
  86 // for (i = 0; i < 0x10000; ++i) {
  87 //   c = String.fromCharCode(i);
  88 //   d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
  89 //   s = d.getElementsByTagName("SPAN")[0];
  90 //   if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
  91 // }
  92 // document.write(a.join(", "));
  93 // })()</script>
  94 var htmlNospaceReplacementTable = []string{
  95         0:    "&#xfffd;",
  96         '\t': "&#9;",
  97         '\n': "&#10;",
  98         '\v': "&#11;",
  99         '\f': "&#12;",
 100         '\r': "&#13;",
 101         ' ':  "&#32;",
 102         '"':  "&#34;",
 103         '&':  "&amp;",
 104         '\'': "&#39;",
 105         '+':  "&#43;",
 106         '<':  "&lt;",
 107         '=':  "&#61;",
 108         '>':  "&gt;",
 109         // A parse error in the attribute value (unquoted) and
 110         // before attribute value states.
 111         // Treated as a quoting character by IE.
 112         '`': "&#96;",
 113 }
 114
 115 // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
 116 // without '&' to avoid over-encoding existing entities.
 117 var htmlNospaceNormReplacementTable = []string{
 118         0:    "&#xfffd;",
 119         '\t': "&#9;",
 120         '\n': "&#10;",
 121         '\v': "&#11;",
 122         '\f': "&#12;",
 123         '\r': "&#13;",
 124         ' ':  "&#32;",
 125         '"':  "&#34;",
 126         '\'': "&#39;",
 127         '+':  "&#43;",
 128         '<':  "&lt;",
 129         '=':  "&#61;",
 130         '>':  "&gt;",
 131         // A parse error in the attribute value (unquoted) and
 132         // before attribute value states.
 133         // Treated as a quoting character by IE.
 134         '`': "&#96;",
 135 }
 136
 137 // htmlReplacer returns s with runes replaced according to replacementTable
 138 // and when badRunes is true, certain bad runes are allowed through unescaped.
 139 func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
 140         written, b := 0, new(bytes.Buffer)
 141         for i, r := range s {
 142                 if int(r) < len(replacementTable) {
 143                         if repl := replacementTable[r]; len(repl) != 0 {
 144                                 b.WriteString(s[written:i])
 145                                 b.WriteString(repl)
 146                                 // Valid as long as replacementTable doesn't
 147                                 // include anything above 0x7f.
 148                                 written = i + utf8.RuneLen(r)
 149                         }
 150                 } else if badRunes {
 151                         // No-op.
 152                         // IE does not allow these ranges in unquoted attrs.
 153                 } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
 154                         fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
 155                         written = i + utf8.RuneLen(r)
 156                 }
 157         }
 158         if written == 0 {
 159                 return s
 160         }
 161         b.WriteString(s[written:])
 162         return b.String()
 163 }
 164
 165 // stripTags takes a snippet of HTML and returns only the text content.
 166 // For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
 167 func stripTags(html string) string {
 168         var b bytes.Buffer
 169         s, c, i, allText := []byte(html), context{}, 0, true
 170         // Using the transition funcs helps us avoid mangling
 171         // `<div title="1>2">` or `I <3 Ponies!`.
 172         for i != len(s) {
 173                 if c.delim == delimNone {
 174                         st := c.state
 175                         // Use RCDATA instead of parsing into JS or CSS styles.
 176                         if c.element != elementNone && !isInTag(st) {
 177                                 st = stateRCDATA
 178                         }
 179                         d, nread := transitionFunc[st](c, s[i:])
 180                         i1 := i + nread
 181                         if c.state == stateText || c.state == stateRCDATA {
 182                                 // Emit text up to the start of the tag or comment.
 183                                 j := i1
 184                                 if d.state != c.state {
 185                                         for j1 := j - 1; j1 >= i; j1-- {
 186                                                 if s[j1] == '<' {
 187                                                         j = j1
 188                                                         break
 189                                                 }
 190                                         }
 191                                 }
 192                                 b.Write(s[i:j])
 193                         } else {
 194                                 allText = false
 195                         }
 196                         c, i = d, i1
 197                         continue
 198                 }
 199                 i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
 200                 if i1 < i {
 201                         break
 202                 }
 203                 if c.delim != delimSpaceOrTagEnd {
 204                         // Consume any quote.
 205                         i1++
 206                 }
 207                 c, i = context{state: stateTag, element: c.element}, i1
 208         }
 209         if allText {
 210                 return html
 211         } else if c.state == stateText || c.state == stateRCDATA {
 212                 b.Write(s[i:])
 213         }
 214         return b.String()
 215 }
 216
 217 // htmlNameFilter accepts valid parts of an HTML attribute or tag name or
 218 // a known-safe HTML attribute.
 219 func htmlNameFilter(args ...interface{}) string {
 220         s, t := stringify(args...)
 221         if t == contentTypeHTMLAttr {
 222                 return s
 223         }
 224         if len(s) == 0 {
 225                 // Avoid violation of structure preservation.
 226                 // <input checked {{.K}}={{.V}}>.
 227                 // Without this, if .K is empty then .V is the value of
 228                 // checked, but otherwise .V is the value of the attribute
 229                 // named .K.
 230                 return filterFailsafe
 231         }
 232         s = strings.ToLower(s)
 233         if t := attrType(s); t != contentTypePlain {
 234                 // TODO: Split attr and element name part filters so we can whitelist
 235                 // attributes.
 236                 return filterFailsafe
 237         }
 238         for _, r := range s {
 239                 switch {
 240                 case '0' <= r && r <= '9':
 241                 case 'a' <= r && r <= 'z':
 242                 default:
 243                         return filterFailsafe
 244                 }
 245         }
 246         return s
 247 }
 248
 249 // commentEscaper returns the empty string regardless of input.
 250 // Comment content does not correspond to any parsed structure or
 251 // human-readable content, so the simplest and most secure policy is to drop
 252 // content interpolated into comments.
 253 // This approach is equally valid whether or not static comment content is
 254 // removed from the template.
 255 func commentEscaper(args ...interface{}) string {
 256         return ""
 257 }