Imported Upstream version 4.8.1
[platform/upstream/gcc48.git] / libgo / go / exp / html / parse.go
index 04f4ae7..0ff1574 100644 (file)
@@ -5,6 +5,9 @@
 package html
 
 import (
+       "errors"
+       a "exp/html/atom"
+       "fmt"
        "io"
        "strings"
 )
@@ -16,9 +19,8 @@ type parser struct {
        tokenizer *Tokenizer
        // tok is the most recently read token.
        tok Token
-       // Self-closing tags like <hr/> are re-interpreted as a two-token sequence:
-       // <hr> followed by </hr>. hasSelfClosingToken is true if we have just read
-       // the synthetic start tag and the next one due is the matching end tag.
+       // Self-closing tags like <hr/> are treated as start tags, except that
+       // hasSelfClosingToken is set while they are being processed.
        hasSelfClosingToken bool
        // doc is the document root element.
        doc *Node
@@ -39,6 +41,8 @@ type parser struct {
        fosterParenting bool
        // quirks is whether the parser is operating in "quirks mode."
        quirks bool
+       // fragment is whether the parser is parsing an HTML fragment.
+       fragment bool
        // context is the context element when parsing an HTML fragment
        // (section 12.4).
        context *Node
@@ -53,10 +57,10 @@ func (p *parser) top() *Node {
 
 // Stop tags for use in popUntil. These come from section 12.2.3.2.
 var (
-       defaultScopeStopTags = map[string][]string{
-               "":     {"applet", "caption", "html", "table", "td", "th", "marquee", "object"},
-               "math": {"annotation-xml", "mi", "mn", "mo", "ms", "mtext"},
-               "svg":  {"desc", "foreignObject", "title"},
+       defaultScopeStopTags = map[string][]a.Atom{
+               "":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object},
+               "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
+               "svg":  {a.Desc, a.ForeignObject, a.Title},
        }
 )
 
@@ -68,6 +72,8 @@ const (
        buttonScope
        tableScope
        tableRowScope
+       tableBodyScope
+       selectScope
 )
 
 // popUntil pops the stack of open elements at the highest element whose tag
@@ -87,7 +93,7 @@ const (
 // no higher element in the stack that was also in the stop tags). For example,
 // popUntil(tableScope, "table") returns true and leaves:
 // ["html", "body", "font"]
-func (p *parser) popUntil(s scope, matchTags ...string) bool {
+func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
        if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
                p.oe = p.oe[:i]
                return true
@@ -98,12 +104,12 @@ func (p *parser) popUntil(s scope, matchTags ...string) bool {
 // indexOfElementInScope returns the index in p.oe of the highest element whose
 // tag is in matchTags that is in scope. If no matching element is in scope, it
 // returns -1.
-func (p *parser) indexOfElementInScope(s scope, matchTags ...string) int {
+func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
        for i := len(p.oe) - 1; i >= 0; i-- {
-               tag := p.oe[i].Data
+               tagAtom := p.oe[i].DataAtom
                if p.oe[i].Namespace == "" {
                        for _, t := range matchTags {
-                               if t == tag {
+                               if t == tagAtom {
                                        return i
                                }
                        }
@@ -111,15 +117,19 @@ func (p *parser) indexOfElementInScope(s scope, matchTags ...string) int {
                        case defaultScope:
                                // No-op.
                        case listItemScope:
-                               if tag == "ol" || tag == "ul" {
+                               if tagAtom == a.Ol || tagAtom == a.Ul {
                                        return -1
                                }
                        case buttonScope:
-                               if tag == "button" {
+                               if tagAtom == a.Button {
                                        return -1
                                }
                        case tableScope:
-                               if tag == "html" || tag == "table" {
+                               if tagAtom == a.Html || tagAtom == a.Table {
+                                       return -1
+                               }
+                       case selectScope:
+                               if tagAtom != a.Optgroup && tagAtom != a.Option {
                                        return -1
                                }
                        default:
@@ -129,7 +139,7 @@ func (p *parser) indexOfElementInScope(s scope, matchTags ...string) int {
                switch s {
                case defaultScope, listItemScope, buttonScope:
                        for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
-                               if t == tag {
+                               if t == tagAtom {
                                        return -1
                                }
                        }
@@ -140,7 +150,7 @@ func (p *parser) indexOfElementInScope(s scope, matchTags ...string) int {
 
 // elementInScope is like popUntil, except that it doesn't modify the stack of
 // open elements.
-func (p *parser) elementInScope(s scope, matchTags ...string) bool {
+func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
        return p.indexOfElementInScope(s, matchTags...) != -1
 }
 
@@ -148,15 +158,20 @@ func (p *parser) elementInScope(s scope, matchTags ...string) bool {
 // scope-defined element is found.
 func (p *parser) clearStackToContext(s scope) {
        for i := len(p.oe) - 1; i >= 0; i-- {
-               tag := p.oe[i].Data
+               tagAtom := p.oe[i].DataAtom
                switch s {
                case tableScope:
-                       if tag == "html" || tag == "table" {
+                       if tagAtom == a.Html || tagAtom == a.Table {
                                p.oe = p.oe[:i+1]
                                return
                        }
                case tableRowScope:
-                       if tag == "html" || tag == "tr" {
+                       if tagAtom == a.Html || tagAtom == a.Tr {
+                               p.oe = p.oe[:i+1]
+                               return
+                       }
+               case tableBodyScope:
+                       if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead {
                                p.oe = p.oe[:i+1]
                                return
                        }
@@ -166,13 +181,38 @@ func (p *parser) clearStackToContext(s scope) {
        }
 }
 
+// generateImpliedEndTags pops nodes off the stack of open elements as long as
+// the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt.
+// If exceptions are specified, nodes with that name will not be popped off.
+func (p *parser) generateImpliedEndTags(exceptions ...string) {
+       var i int
+loop:
+       for i = len(p.oe) - 1; i >= 0; i-- {
+               n := p.oe[i]
+               if n.Type == ElementNode {
+                       switch n.DataAtom {
+                       case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt:
+                               for _, except := range exceptions {
+                                       if n.Data == except {
+                                               break loop
+                                       }
+                               }
+                               continue
+                       }
+               }
+               break
+       }
+
+       p.oe = p.oe[:i+1]
+}
+
 // addChild adds a child node n to the top element, and pushes n onto the stack
 // of open elements if it is an element node.
 func (p *parser) addChild(n *Node) {
-       if p.fosterParenting {
+       if p.shouldFosterParent() {
                p.fosterParent(n)
        } else {
-               p.top().Add(n)
+               p.top().AppendChild(n)
        }
 
        if n.Type == ElementNode {
@@ -180,14 +220,25 @@ func (p *parser) addChild(n *Node) {
        }
 }
 
+// shouldFosterParent returns whether the next node to be added should be
+// foster parented.
+func (p *parser) shouldFosterParent() bool {
+       if p.fosterParenting {
+               switch p.top().DataAtom {
+               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+                       return true
+               }
+       }
+       return false
+}
+
 // fosterParent adds a child node according to the foster parenting rules.
 // Section 12.2.5.3, "foster parenting".
 func (p *parser) fosterParent(n *Node) {
-       p.fosterParenting = false
-       var table, parent *Node
+       var table, parent, prev *Node
        var i int
        for i = len(p.oe) - 1; i >= 0; i-- {
-               if p.oe[i].Data == "table" {
+               if p.oe[i].DataAtom == a.Table {
                        table = p.oe[i]
                        break
                }
@@ -203,35 +254,37 @@ func (p *parser) fosterParent(n *Node) {
                parent = p.oe[i-1]
        }
 
-       var child *Node
-       for i, child = range parent.Child {
-               if child == table {
-                       break
-               }
+       if table != nil {
+               prev = table.PrevSibling
+       } else {
+               prev = parent.LastChild
        }
-
-       if i > 0 && parent.Child[i-1].Type == TextNode && n.Type == TextNode {
-               parent.Child[i-1].Data += n.Data
+       if prev != nil && prev.Type == TextNode && n.Type == TextNode {
+               prev.Data += n.Data
                return
        }
 
-       if i == len(parent.Child) {
-               parent.Add(n)
-       } else {
-               // Insert n into parent.Child at index i.
-               parent.Child = append(parent.Child[:i+1], parent.Child[i:]...)
-               parent.Child[i] = n
-               n.Parent = parent
-       }
+       parent.InsertBefore(n, table)
 }
 
 // addText adds text to the preceding node if it is a text node, or else it
 // calls addChild with a new text node.
 func (p *parser) addText(text string) {
-       // TODO: distinguish whitespace text from others.
+       if text == "" {
+               return
+       }
+
+       if p.shouldFosterParent() {
+               p.fosterParent(&Node{
+                       Type: TextNode,
+                       Data: text,
+               })
+               return
+       }
+
        t := p.top()
-       if i := len(t.Child); i > 0 && t.Child[i-1].Type == TextNode {
-               t.Child[i-1].Data += text
+       if n := t.LastChild; n != nil && n.Type == TextNode {
+               n.Data += text
                return
        }
        p.addChild(&Node{
@@ -240,20 +293,61 @@ func (p *parser) addText(text string) {
        })
 }
 
-// addElement calls addChild with an element node.
-func (p *parser) addElement(tag string, attr []Attribute) {
+// addElement adds a child element based on the current token.
+func (p *parser) addElement() {
        p.addChild(&Node{
-               Type: ElementNode,
-               Data: tag,
-               Attr: attr,
+               Type:     ElementNode,
+               DataAtom: p.tok.DataAtom,
+               Data:     p.tok.Data,
+               Attr:     p.tok.Attr,
        })
 }
 
 // Section 12.2.3.3.
-func (p *parser) addFormattingElement(tag string, attr []Attribute) {
-       p.addElement(tag, attr)
+func (p *parser) addFormattingElement() {
+       tagAtom, attr := p.tok.DataAtom, p.tok.Attr
+       p.addElement()
+
+       // Implement the Noah's Ark clause, but with three per family instead of two.
+       identicalElements := 0
+findIdenticalElements:
+       for i := len(p.afe) - 1; i >= 0; i-- {
+               n := p.afe[i]
+               if n.Type == scopeMarkerNode {
+                       break
+               }
+               if n.Type != ElementNode {
+                       continue
+               }
+               if n.Namespace != "" {
+                       continue
+               }
+               if n.DataAtom != tagAtom {
+                       continue
+               }
+               if len(n.Attr) != len(attr) {
+                       continue
+               }
+       compareAttributes:
+               for _, t0 := range n.Attr {
+                       for _, t1 := range attr {
+                               if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
+                                       // Found a match for this attribute, continue with the next attribute.
+                                       continue compareAttributes
+                               }
+                       }
+                       // If we get here, there is no attribute that matches a.
+                       // Therefore the element is not identical to the new one.
+                       continue findIdenticalElements
+               }
+
+               identicalElements++
+               if identicalElements >= 3 {
+                       p.afe.remove(n)
+               }
+       }
+
        p.afe = append(p.afe, p.top())
-       // TODO.
 }
 
 // Section 12.2.3.3.
@@ -295,27 +389,6 @@ func (p *parser) reconstructActiveFormattingElements() {
        }
 }
 
-// read reads the next token. This is usually from the tokenizer, but it may
-// be the synthesized end tag implied by a self-closing tag.
-func (p *parser) read() error {
-       if p.hasSelfClosingToken {
-               p.hasSelfClosingToken = false
-               p.tok.Type = EndTagToken
-               p.tok.Attr = nil
-               return nil
-       }
-       p.tokenizer.Next()
-       p.tok = p.tokenizer.Token()
-       switch p.tok.Type {
-       case ErrorToken:
-               return p.tokenizer.Err()
-       case SelfClosingTagToken:
-               p.hasSelfClosingToken = true
-               p.tok.Type = StartTagToken
-       }
-       return nil
-}
-
 // Section 12.2.4.
 func (p *parser) acknowledgeSelfClosingTag() {
        p.hasSelfClosingToken = false
@@ -345,28 +418,28 @@ func (p *parser) resetInsertionMode() {
                        n = p.context
                }
 
-               switch n.Data {
-               case "select":
+               switch n.DataAtom {
+               case a.Select:
                        p.im = inSelectIM
-               case "td", "th":
+               case a.Td, a.Th:
                        p.im = inCellIM
-               case "tr":
+               case a.Tr:
                        p.im = inRowIM
-               case "tbody", "thead", "tfoot":
+               case a.Tbody, a.Thead, a.Tfoot:
                        p.im = inTableBodyIM
-               case "caption":
+               case a.Caption:
                        p.im = inCaptionIM
-               case "colgroup":
+               case a.Colgroup:
                        p.im = inColumnGroupIM
-               case "table":
+               case a.Table:
                        p.im = inTableIM
-               case "head":
+               case a.Head:
                        p.im = inBodyIM
-               case "body":
+               case a.Body:
                        p.im = inBodyIM
-               case "frameset":
+               case a.Frameset:
                        p.im = inFramesetIM
-               case "html":
+               case a.Html:
                        p.im = beforeHeadIM
                default:
                        continue
@@ -388,14 +461,14 @@ func initialIM(p *parser) bool {
                        return true
                }
        case CommentToken:
-               p.doc.Add(&Node{
+               p.doc.AppendChild(&Node{
                        Type: CommentNode,
                        Data: p.tok.Data,
                })
                return true
        case DoctypeToken:
                n, quirks := parseDoctype(p.tok.Data)
-               p.doc.Add(n)
+               p.doc.AppendChild(n)
                p.quirks = quirks
                p.im = beforeHTMLIM
                return true
@@ -408,6 +481,9 @@ func initialIM(p *parser) bool {
 // Section 12.2.5.4.2.
 func beforeHTMLIM(p *parser) bool {
        switch p.tok.Type {
+       case DoctypeToken:
+               // Ignore the token.
+               return true
        case TextToken:
                p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
                if len(p.tok.Data) == 0 {
@@ -415,65 +491,58 @@ func beforeHTMLIM(p *parser) bool {
                        return true
                }
        case StartTagToken:
-               if p.tok.Data == "html" {
-                       p.addElement(p.tok.Data, p.tok.Attr)
+               if p.tok.DataAtom == a.Html {
+                       p.addElement()
                        p.im = beforeHeadIM
                        return true
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "head", "body", "html", "br":
-                       // Drop down to creating an implied <html> tag.
+               switch p.tok.DataAtom {
+               case a.Head, a.Body, a.Html, a.Br:
+                       p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
+                       return false
                default:
                        // Ignore the token.
                        return true
                }
        case CommentToken:
-               p.doc.Add(&Node{
+               p.doc.AppendChild(&Node{
                        Type: CommentNode,
                        Data: p.tok.Data,
                })
                return true
        }
-       // Create an implied <html> tag.
-       p.addElement("html", nil)
-       p.im = beforeHeadIM
+       p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
        return false
 }
 
 // Section 12.2.5.4.3.
 func beforeHeadIM(p *parser) bool {
-       var (
-               add     bool
-               attr    []Attribute
-               implied bool
-       )
        switch p.tok.Type {
-       case ErrorToken:
-               implied = true
        case TextToken:
                p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
                if len(p.tok.Data) == 0 {
                        // It was all whitespace, so ignore it.
                        return true
                }
-               implied = true
        case StartTagToken:
-               switch p.tok.Data {
-               case "head":
-                       add = true
-                       attr = p.tok.Attr
-               case "html":
+               switch p.tok.DataAtom {
+               case a.Head:
+                       p.addElement()
+                       p.head = p.top()
+                       p.im = inHeadIM
+                       return true
+               case a.Html:
                        return inBodyIM(p)
-               default:
-                       implied = true
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "head", "body", "html", "br":
-                       implied = true
+               switch p.tok.DataAtom {
+               case a.Head, a.Body, a.Html, a.Br:
+                       p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
+                       return false
                default:
                        // Ignore the token.
+                       return true
                }
        case CommentToken:
                p.addChild(&Node{
@@ -481,24 +550,18 @@ func beforeHeadIM(p *parser) bool {
                        Data: p.tok.Data,
                })
                return true
+       case DoctypeToken:
+               // Ignore the token.
+               return true
        }
-       if add || implied {
-               p.addElement("head", attr)
-               p.head = p.top()
-       }
-       p.im = inHeadIM
-       return !implied
+
+       p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
+       return false
 }
 
 // Section 12.2.5.4.4.
 func inHeadIM(p *parser) bool {
-       var (
-               pop     bool
-               implied bool
-       )
        switch p.tok.Type {
-       case ErrorToken:
-               implied = true
        case TextToken:
                s := strings.TrimLeft(p.tok.Data, whitespace)
                if len(s) < len(p.tok.Data) {
@@ -509,32 +572,36 @@ func inHeadIM(p *parser) bool {
                        }
                        p.tok.Data = s
                }
-               implied = true
        case StartTagToken:
-               switch p.tok.Data {
-               case "html":
+               switch p.tok.DataAtom {
+               case a.Html:
                        return inBodyIM(p)
-               case "base", "basefont", "bgsound", "command", "link", "meta":
-                       p.addElement(p.tok.Data, p.tok.Attr)
+               case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
+                       p.addElement()
                        p.oe.pop()
                        p.acknowledgeSelfClosingTag()
-               case "script", "title", "noscript", "noframes", "style":
-                       p.addElement(p.tok.Data, p.tok.Attr)
+                       return true
+               case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
+                       p.addElement()
                        p.setOriginalIM()
                        p.im = textIM
                        return true
-               case "head":
+               case a.Head:
                        // Ignore the token.
                        return true
-               default:
-                       implied = true
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "head":
-                       pop = true
-               case "body", "html", "br":
-                       implied = true
+               switch p.tok.DataAtom {
+               case a.Head:
+                       n := p.oe.pop()
+                       if n.DataAtom != a.Head {
+                               panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
+                       }
+                       p.im = afterHeadIM
+                       return true
+               case a.Body, a.Html, a.Br:
+                       p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
+                       return false
                default:
                        // Ignore the token.
                        return true
@@ -545,30 +612,18 @@ func inHeadIM(p *parser) bool {
                        Data: p.tok.Data,
                })
                return true
+       case DoctypeToken:
+               // Ignore the token.
+               return true
        }
-       if pop || implied {
-               n := p.oe.pop()
-               if n.Data != "head" {
-                       panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
-               }
-               p.im = afterHeadIM
-               return !implied
-       }
-       return true
+
+       p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
+       return false
 }
 
 // Section 12.2.5.4.6.
 func afterHeadIM(p *parser) bool {
-       var (
-               add        bool
-               attr       []Attribute
-               framesetOK bool
-               implied    bool
-       )
        switch p.tok.Type {
-       case ErrorToken:
-               implied = true
-               framesetOK = true
        case TextToken:
                s := strings.TrimLeft(p.tok.Data, whitespace)
                if len(s) < len(p.tok.Data) {
@@ -579,36 +634,31 @@ func afterHeadIM(p *parser) bool {
                        }
                        p.tok.Data = s
                }
-               implied = true
-               framesetOK = true
        case StartTagToken:
-               switch p.tok.Data {
-               case "html":
-                       // TODO.
-               case "body":
-                       add = true
-                       attr = p.tok.Attr
-                       framesetOK = false
-               case "frameset":
-                       p.addElement(p.tok.Data, p.tok.Attr)
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Body:
+                       p.addElement()
+                       p.framesetOK = false
+                       p.im = inBodyIM
+                       return true
+               case a.Frameset:
+                       p.addElement()
                        p.im = inFramesetIM
                        return true
-               case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title":
+               case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
                        p.oe = append(p.oe, p.head)
-                       defer p.oe.pop()
+                       defer p.oe.remove(p.head)
                        return inHeadIM(p)
-               case "head":
+               case a.Head:
                        // Ignore the token.
                        return true
-               default:
-                       implied = true
-                       framesetOK = true
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "body", "html", "br":
-                       implied = true
-                       framesetOK = true
+               switch p.tok.DataAtom {
+               case a.Body, a.Html, a.Br:
+                       // Drop down to creating an implied <body> tag.
                default:
                        // Ignore the token.
                        return true
@@ -619,13 +669,14 @@ func afterHeadIM(p *parser) bool {
                        Data: p.tok.Data,
                })
                return true
+       case DoctypeToken:
+               // Ignore the token.
+               return true
        }
-       if add || implied {
-               p.addElement("body", attr)
-               p.framesetOK = framesetOK
-       }
-       p.im = inBodyIM
-       return !implied
+
+       p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
+       p.framesetOK = true
+       return false
 }
 
 // copyAttributes copies attributes of src not found on dst to dst.
@@ -634,13 +685,13 @@ func copyAttributes(dst *Node, src Token) {
                return
        }
        attr := map[string]string{}
-       for _, a := range dst.Attr {
-               attr[a.Key] = a.Val
+       for _, t := range dst.Attr {
+               attr[t.Key] = t.Val
        }
-       for _, a := range src.Attr {
-               if _, ok := attr[a.Key]; !ok {
-                       dst.Attr = append(dst.Attr, a)
-                       attr[a.Key] = a.Val
+       for _, t := range src.Attr {
+               if _, ok := attr[t.Key]; !ok {
+                       dst.Attr = append(dst.Attr, t)
+                       attr[t.Key] = t.Val
                }
        }
 }
@@ -649,106 +700,85 @@ func copyAttributes(dst *Node, src Token) {
 func inBodyIM(p *parser) bool {
        switch p.tok.Type {
        case TextToken:
-               switch n := p.oe.top(); n.Data {
-               case "pre", "listing", "textarea":
-                       if len(n.Child) == 0 {
+               d := p.tok.Data
+               switch n := p.oe.top(); n.DataAtom {
+               case a.Pre, a.Listing:
+                       if n.FirstChild == nil {
                                // Ignore a newline at the start of a <pre> block.
-                               d := p.tok.Data
                                if d != "" && d[0] == '\r' {
                                        d = d[1:]
                                }
                                if d != "" && d[0] == '\n' {
                                        d = d[1:]
                                }
-                               if d == "" {
-                                       return true
-                               }
-                               p.tok.Data = d
                        }
                }
+               d = strings.Replace(d, "\x00", "", -1)
+               if d == "" {
+                       return true
+               }
                p.reconstructActiveFormattingElements()
-               p.addText(p.tok.Data)
-               p.framesetOK = false
+               p.addText(d)
+               if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
+                       // There were non-whitespace characters inserted.
+                       p.framesetOK = false
+               }
        case StartTagToken:
-               switch p.tok.Data {
-               case "html":
+               switch p.tok.DataAtom {
+               case a.Html:
                        copyAttributes(p.oe[0], p.tok)
-               case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul":
-                       p.popUntil(buttonScope, "p")
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "h1", "h2", "h3", "h4", "h5", "h6":
-                       p.popUntil(buttonScope, "p")
-                       switch n := p.top(); n.Data {
-                       case "h1", "h2", "h3", "h4", "h5", "h6":
-                               p.oe.pop()
-                       }
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "a":
-                       for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
-                               if n := p.afe[i]; n.Type == ElementNode && n.Data == "a" {
-                                       p.inBodyEndTagFormatting("a")
-                                       p.oe.remove(n)
-                                       p.afe.remove(n)
-                                       break
+               case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
+                       return inHeadIM(p)
+               case a.Body:
+                       if len(p.oe) >= 2 {
+                               body := p.oe[1]
+                               if body.Type == ElementNode && body.DataAtom == a.Body {
+                                       p.framesetOK = false
+                                       copyAttributes(body, p.tok)
                                }
                        }
-                       p.reconstructActiveFormattingElements()
-                       p.addFormattingElement(p.tok.Data, p.tok.Attr)
-               case "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u":
-                       p.reconstructActiveFormattingElements()
-                       p.addFormattingElement(p.tok.Data, p.tok.Attr)
-               case "nobr":
-                       p.reconstructActiveFormattingElements()
-                       if p.elementInScope(defaultScope, "nobr") {
-                               p.inBodyEndTagFormatting("nobr")
-                               p.reconstructActiveFormattingElements()
+               case a.Frameset:
+                       if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
+                               // Ignore the token.
+                               return true
                        }
-                       p.addFormattingElement(p.tok.Data, p.tok.Attr)
-               case "applet", "marquee", "object":
-                       p.reconstructActiveFormattingElements()
-                       p.addElement(p.tok.Data, p.tok.Attr)
-                       p.afe = append(p.afe, &scopeMarker)
-                       p.framesetOK = false
-               case "area", "br", "embed", "img", "input", "keygen", "wbr":
-                       p.reconstructActiveFormattingElements()
-                       p.addElement(p.tok.Data, p.tok.Attr)
-                       p.oe.pop()
-                       p.acknowledgeSelfClosingTag()
-                       p.framesetOK = false
-               case "table":
-                       if !p.quirks {
-                               p.popUntil(buttonScope, "p")
+                       body := p.oe[1]
+                       if body.Parent != nil {
+                               body.Parent.RemoveChild(body)
                        }
-                       p.addElement(p.tok.Data, p.tok.Attr)
-                       p.framesetOK = false
-                       p.im = inTableIM
+                       p.oe = p.oe[:1]
+                       p.addElement()
+                       p.im = inFramesetIM
                        return true
-               case "hr":
-                       p.popUntil(buttonScope, "p")
-                       p.addElement(p.tok.Data, p.tok.Attr)
-                       p.oe.pop()
-                       p.acknowledgeSelfClosingTag()
-                       p.framesetOK = false
-               case "select":
-                       p.reconstructActiveFormattingElements()
-                       p.addElement(p.tok.Data, p.tok.Attr)
+               case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+               case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+                       p.popUntil(buttonScope, a.P)
+                       switch n := p.top(); n.DataAtom {
+                       case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+                               p.oe.pop()
+                       }
+                       p.addElement()
+               case a.Pre, a.Listing:
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+                       // The newline, if any, will be dealt with by the TextToken case.
                        p.framesetOK = false
-                       p.im = inSelectIM
-                       return true
-               case "form":
+               case a.Form:
                        if p.form == nil {
-                               p.popUntil(buttonScope, "p")
-                               p.addElement(p.tok.Data, p.tok.Attr)
+                               p.popUntil(buttonScope, a.P)
+                               p.addElement()
                                p.form = p.top()
                        }
-               case "li":
+               case a.Li:
                        p.framesetOK = false
                        for i := len(p.oe) - 1; i >= 0; i-- {
                                node := p.oe[i]
-                               switch node.Data {
-                               case "li":
-                                       p.popUntil(listItemScope, "li")
-                               case "address", "div", "p":
+                               switch node.DataAtom {
+                               case a.Li:
+                                       p.oe = p.oe[:i]
+                               case a.Address, a.Div, a.P:
                                        continue
                                default:
                                        if !isSpecialElement(node) {
@@ -757,16 +787,16 @@ func inBodyIM(p *parser) bool {
                                }
                                break
                        }
-                       p.popUntil(buttonScope, "p")
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "dd", "dt":
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+               case a.Dd, a.Dt:
                        p.framesetOK = false
                        for i := len(p.oe) - 1; i >= 0; i-- {
                                node := p.oe[i]
-                               switch node.Data {
-                               case "dd", "dt":
+                               switch node.DataAtom {
+                               case a.Dd, a.Dt:
                                        p.oe = p.oe[:i]
-                               case "address", "div", "p":
+                               case a.Address, a.Div, a.P:
                                        continue
                                default:
                                        if !isSpecialElement(node) {
@@ -775,49 +805,81 @@ func inBodyIM(p *parser) bool {
                                }
                                break
                        }
-                       p.popUntil(buttonScope, "p")
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "plaintext":
-                       p.popUntil(buttonScope, "p")
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "button":
-                       p.popUntil(defaultScope, "button")
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+               case a.Plaintext:
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+               case a.Button:
+                       p.popUntil(defaultScope, a.Button)
                        p.reconstructActiveFormattingElements()
-                       p.addElement(p.tok.Data, p.tok.Attr)
+                       p.addElement()
                        p.framesetOK = false
-               case "optgroup", "option":
-                       if p.top().Data == "option" {
-                               p.oe.pop()
-                       }
-                       p.reconstructActiveFormattingElements()
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "body":
-                       if len(p.oe) >= 2 {
-                               body := p.oe[1]
-                               if body.Type == ElementNode && body.Data == "body" {
-                                       p.framesetOK = false
-                                       copyAttributes(body, p.tok)
+               case a.A:
+                       for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
+                               if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
+                                       p.inBodyEndTagFormatting(a.A)
+                                       p.oe.remove(n)
+                                       p.afe.remove(n)
+                                       break
                                }
                        }
-               case "frameset":
-                       if !p.framesetOK || len(p.oe) < 2 || p.oe[1].Data != "body" {
-                               // Ignore the token.
-                               return true
+                       p.reconstructActiveFormattingElements()
+                       p.addFormattingElement()
+               case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
+                       p.reconstructActiveFormattingElements()
+                       p.addFormattingElement()
+               case a.Nobr:
+                       p.reconstructActiveFormattingElements()
+                       if p.elementInScope(defaultScope, a.Nobr) {
+                               p.inBodyEndTagFormatting(a.Nobr)
+                               p.reconstructActiveFormattingElements()
                        }
-                       body := p.oe[1]
-                       if body.Parent != nil {
-                               body.Parent.Remove(body)
+                       p.addFormattingElement()
+               case a.Applet, a.Marquee, a.Object:
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+                       p.afe = append(p.afe, &scopeMarker)
+                       p.framesetOK = false
+               case a.Table:
+                       if !p.quirks {
+                               p.popUntil(buttonScope, a.P)
                        }
-                       p.oe = p.oe[:1]
-                       p.addElement(p.tok.Data, p.tok.Attr)
-                       p.im = inFramesetIM
+                       p.addElement()
+                       p.framesetOK = false
+                       p.im = inTableIM
                        return true
-               case "base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title":
-                       return inHeadIM(p)
-               case "image":
-                       p.tok.Data = "img"
+               case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+                       if p.tok.DataAtom == a.Input {
+                               for _, t := range p.tok.Attr {
+                                       if t.Key == "type" {
+                                               if strings.ToLower(t.Val) == "hidden" {
+                                                       // Skip setting framesetOK = false
+                                                       return true
+                                               }
+                                       }
+                               }
+                       }
+                       p.framesetOK = false
+               case a.Param, a.Source, a.Track:
+                       p.addElement()
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+               case a.Hr:
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+                       p.framesetOK = false
+               case a.Image:
+                       p.tok.DataAtom = a.Img
+                       p.tok.Data = a.Img.String()
                        return false
-               case "isindex":
+               case a.Isindex:
                        if p.form != nil {
                                // Ignore the token.
                                return true
@@ -825,82 +887,142 @@ func inBodyIM(p *parser) bool {
                        action := ""
                        prompt := "This is a searchable index. Enter search keywords: "
                        attr := []Attribute{{Key: "name", Val: "isindex"}}
-                       for _, a := range p.tok.Attr {
-                               switch a.Key {
+                       for _, t := range p.tok.Attr {
+                               switch t.Key {
                                case "action":
-                                       action = a.Val
+                                       action = t.Val
                                case "name":
                                        // Ignore the attribute.
                                case "prompt":
-                                       prompt = a.Val
+                                       prompt = t.Val
                                default:
-                                       attr = append(attr, a)
+                                       attr = append(attr, t)
                                }
                        }
                        p.acknowledgeSelfClosingTag()
-                       p.popUntil(buttonScope, "p")
-                       p.addElement("form", nil)
-                       p.form = p.top()
+                       p.popUntil(buttonScope, a.P)
+                       p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
                        if action != "" {
                                p.form.Attr = []Attribute{{Key: "action", Val: action}}
                        }
-                       p.addElement("hr", nil)
-                       p.oe.pop()
-                       p.addElement("label", nil)
+                       p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
+                       p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
                        p.addText(prompt)
-                       p.addElement("input", attr)
-                       p.oe.pop()
-                       p.oe.pop()
-                       p.addElement("hr", nil)
-                       p.oe.pop()
+                       p.addChild(&Node{
+                               Type:     ElementNode,
+                               DataAtom: a.Input,
+                               Data:     a.Input.String(),
+                               Attr:     attr,
+                       })
                        p.oe.pop()
-                       p.form = nil
-               case "xmp":
-                       p.popUntil(buttonScope, "p")
+                       p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
+                       p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
+                       p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
+               case a.Textarea:
+                       p.addElement()
+                       p.setOriginalIM()
+                       p.framesetOK = false
+                       p.im = textIM
+               case a.Xmp:
+                       p.popUntil(buttonScope, a.P)
+                       p.reconstructActiveFormattingElements()
+                       p.framesetOK = false
+                       p.addElement()
+                       p.setOriginalIM()
+                       p.im = textIM
+               case a.Iframe:
+                       p.framesetOK = false
+                       p.addElement()
+                       p.setOriginalIM()
+                       p.im = textIM
+               case a.Noembed, a.Noscript:
+                       p.addElement()
+                       p.setOriginalIM()
+                       p.im = textIM
+               case a.Select:
                        p.reconstructActiveFormattingElements()
+                       p.addElement()
                        p.framesetOK = false
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "math", "svg":
+                       p.im = inSelectIM
+                       return true
+               case a.Optgroup, a.Option:
+                       if p.top().DataAtom == a.Option {
+                               p.oe.pop()
+                       }
                        p.reconstructActiveFormattingElements()
-                       if p.tok.Data == "math" {
-                               // TODO: adjust MathML attributes.
+                       p.addElement()
+               case a.Rp, a.Rt:
+                       if p.elementInScope(defaultScope, a.Ruby) {
+                               p.generateImpliedEndTags()
+                       }
+                       p.addElement()
+               case a.Math, a.Svg:
+                       p.reconstructActiveFormattingElements()
+                       if p.tok.DataAtom == a.Math {
+                               adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
                        } else {
-                               // TODO: adjust SVG attributes.
+                               adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
                        }
                        adjustForeignAttributes(p.tok.Attr)
-                       p.addElement(p.tok.Data, p.tok.Attr)
+                       p.addElement()
                        p.top().Namespace = p.tok.Data
+                       if p.hasSelfClosingToken {
+                               p.oe.pop()
+                               p.acknowledgeSelfClosingTag()
+                       }
                        return true
-               case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
+               case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
                        // Ignore the token.
                default:
-                       // TODO.
-                       p.addElement(p.tok.Data, p.tok.Attr)
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "body":
-                       // TODO: autoclose the stack of open elements.
-                       p.im = afterBodyIM
+               switch p.tok.DataAtom {
+               case a.Body:
+                       if p.elementInScope(defaultScope, a.Body) {
+                               p.im = afterBodyIM
+                       }
+               case a.Html:
+                       if p.elementInScope(defaultScope, a.Body) {
+                               p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
+                               return false
+                       }
                        return true
-               case "p":
-                       if !p.elementInScope(buttonScope, "p") {
-                               p.addElement("p", nil)
-                       }
-                       p.popUntil(buttonScope, "p")
-               case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
-                       p.inBodyEndTagFormatting(p.tok.Data)
-               case "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul":
-                       p.popUntil(defaultScope, p.tok.Data)
-               case "applet", "marquee", "object":
-                       if p.popUntil(defaultScope, p.tok.Data) {
+               case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
+                       p.popUntil(defaultScope, p.tok.DataAtom)
+               case a.Form:
+                       node := p.form
+                       p.form = nil
+                       i := p.indexOfElementInScope(defaultScope, a.Form)
+                       if node == nil || i == -1 || p.oe[i] != node {
+                               // Ignore the token.
+                               return true
+                       }
+                       p.generateImpliedEndTags()
+                       p.oe.remove(node)
+               case a.P:
+                       if !p.elementInScope(buttonScope, a.P) {
+                               p.parseImpliedToken(StartTagToken, a.P, a.P.String())
+                       }
+                       p.popUntil(buttonScope, a.P)
+               case a.Li:
+                       p.popUntil(listItemScope, a.Li)
+               case a.Dd, a.Dt:
+                       p.popUntil(defaultScope, p.tok.DataAtom)
+               case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+                       p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
+               case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
+                       p.inBodyEndTagFormatting(p.tok.DataAtom)
+               case a.Applet, a.Marquee, a.Object:
+                       if p.popUntil(defaultScope, p.tok.DataAtom) {
                                p.clearActiveFormattingElements()
                        }
-               case "br":
+               case a.Br:
                        p.tok.Type = StartTagToken
                        return false
                default:
-                       p.inBodyEndTagOther(p.tok.Data)
+                       p.inBodyEndTagOther(p.tok.DataAtom)
                }
        case CommentToken:
                p.addChild(&Node{
@@ -912,7 +1034,7 @@ func inBodyIM(p *parser) bool {
        return true
 }
 
-func (p *parser) inBodyEndTagFormatting(tag string) {
+func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
        // This is the "adoption agency" algorithm, described at
        // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#adoptionAgency
 
@@ -928,13 +1050,13 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
                        if p.afe[j].Type == scopeMarkerNode {
                                break
                        }
-                       if p.afe[j].Data == tag {
+                       if p.afe[j].DataAtom == tagAtom {
                                formattingElement = p.afe[j]
                                break
                        }
                }
                if formattingElement == nil {
-                       p.inBodyEndTagOther(tag)
+                       p.inBodyEndTagOther(tagAtom)
                        return
                }
                feIndex := p.oe.index(formattingElement)
@@ -942,7 +1064,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
                        p.afe.remove(formattingElement)
                        return
                }
-               if !p.elementInScope(defaultScope, tag) {
+               if !p.elementInScope(defaultScope, tagAtom) {
                        // Ignore the tag.
                        return
                }
@@ -997,9 +1119,9 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
                        }
                        // Step 9.9.
                        if lastNode.Parent != nil {
-                               lastNode.Parent.Remove(lastNode)
+                               lastNode.Parent.RemoveChild(lastNode)
                        }
-                       node.Add(lastNode)
+                       node.AppendChild(lastNode)
                        // Step 9.10.
                        lastNode = node
                }
@@ -1007,20 +1129,20 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
                // Step 10. Reparent lastNode to the common ancestor,
                // or for misnested table nodes, to the foster parent.
                if lastNode.Parent != nil {
-                       lastNode.Parent.Remove(lastNode)
+                       lastNode.Parent.RemoveChild(lastNode)
                }
-               switch commonAncestor.Data {
-               case "table", "tbody", "tfoot", "thead", "tr":
+               switch commonAncestor.DataAtom {
+               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
                        p.fosterParent(lastNode)
                default:
-                       commonAncestor.Add(lastNode)
+                       commonAncestor.AppendChild(lastNode)
                }
 
                // Steps 11-13. Reparent nodes from the furthest block's children
                // to a clone of the formatting element.
                clone := formattingElement.clone()
                reparentChildren(clone, furthestBlock)
-               furthestBlock.Add(clone)
+               furthestBlock.AppendChild(clone)
 
                // Step 14. Fix up the list of active formatting elements.
                if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
@@ -1037,9 +1159,9 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
 }
 
 // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
-func (p *parser) inBodyEndTagOther(tag string) {
+func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
        for i := len(p.oe) - 1; i >= 0; i-- {
-               if p.oe[i].Data == tag {
+               if p.oe[i].DataAtom == tagAtom {
                        p.oe = p.oe[:i]
                        break
                }
@@ -1055,7 +1177,20 @@ func textIM(p *parser) bool {
        case ErrorToken:
                p.oe.pop()
        case TextToken:
-               p.addText(p.tok.Data)
+               d := p.tok.Data
+               if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
+                       // Ignore a newline at the start of a <textarea> block.
+                       if d != "" && d[0] == '\r' {
+                               d = d[1:]
+                       }
+                       if d != "" && d[0] == '\n' {
+                               d = d[1:]
+                       }
+               }
+               if d == "" {
+                       return true
+               }
+               p.addText(d)
                return true
        case EndTagToken:
                p.oe.pop()
@@ -1072,66 +1207,85 @@ func inTableIM(p *parser) bool {
                // Stop parsing.
                return true
        case TextToken:
-               // TODO.
+               p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
+               switch p.oe.top().DataAtom {
+               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+                       if strings.Trim(p.tok.Data, whitespace) == "" {
+                               p.addText(p.tok.Data)
+                               return true
+                       }
+               }
        case StartTagToken:
-               switch p.tok.Data {
-               case "caption":
+               switch p.tok.DataAtom {
+               case a.Caption:
                        p.clearStackToContext(tableScope)
                        p.afe = append(p.afe, &scopeMarker)
-                       p.addElement(p.tok.Data, p.tok.Attr)
+                       p.addElement()
                        p.im = inCaptionIM
                        return true
-               case "tbody", "tfoot", "thead":
+               case a.Colgroup:
                        p.clearStackToContext(tableScope)
-                       p.addElement(p.tok.Data, p.tok.Attr)
-                       p.im = inTableBodyIM
+                       p.addElement()
+                       p.im = inColumnGroupIM
                        return true
-               case "td", "th", "tr":
+               case a.Col:
+                       p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
+                       return false
+               case a.Tbody, a.Tfoot, a.Thead:
                        p.clearStackToContext(tableScope)
-                       p.addElement("tbody", nil)
+                       p.addElement()
                        p.im = inTableBodyIM
+                       return true
+               case a.Td, a.Th, a.Tr:
+                       p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
                        return false
-               case "table":
-                       if p.popUntil(tableScope, "table") {
+               case a.Table:
+                       if p.popUntil(tableScope, a.Table) {
                                p.resetInsertionMode()
                                return false
                        }
                        // Ignore the token.
                        return true
-               case "colgroup":
-                       p.clearStackToContext(tableScope)
-                       p.addElement(p.tok.Data, p.tok.Attr)
-                       p.im = inColumnGroupIM
-                       return true
-               case "col":
-                       p.clearStackToContext(tableScope)
-                       p.addElement("colgroup", p.tok.Attr)
-                       p.im = inColumnGroupIM
-                       return false
-               case "select":
+               case a.Style, a.Script:
+                       return inHeadIM(p)
+               case a.Input:
+                       for _, t := range p.tok.Attr {
+                               if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
+                                       p.addElement()
+                                       p.oe.pop()
+                                       return true
+                               }
+                       }
+                       // Otherwise drop down to the default action.
+               case a.Form:
+                       if p.form != nil {
+                               // Ignore the token.
+                               return true
+                       }
+                       p.addElement()
+                       p.form = p.oe.pop()
+               case a.Select:
                        p.reconstructActiveFormattingElements()
-                       switch p.top().Data {
-                       case "table", "tbody", "tfoot", "thead", "tr":
+                       switch p.top().DataAtom {
+                       case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
                                p.fosterParenting = true
                        }
-                       p.addElement(p.tok.Data, p.tok.Attr)
+                       p.addElement()
                        p.fosterParenting = false
                        p.framesetOK = false
                        p.im = inSelectInTableIM
                        return true
-               default:
-                       // TODO.
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "table":
-                       if p.popUntil(tableScope, "table") {
+               switch p.tok.DataAtom {
+               case a.Table:
+                       if p.popUntil(tableScope, a.Table) {
                                p.resetInsertionMode()
                                return true
                        }
                        // Ignore the token.
                        return true
-               case "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr":
+               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
                        // Ignore the token.
                        return true
                }
@@ -1141,13 +1295,13 @@ func inTableIM(p *parser) bool {
                        Data: p.tok.Data,
                })
                return true
+       case DoctypeToken:
+               // Ignore the token.
+               return true
        }
 
-       switch p.top().Data {
-       case "table", "tbody", "tfoot", "thead", "tr":
-               p.fosterParenting = true
-               defer func() { p.fosterParenting = false }()
-       }
+       p.fosterParenting = true
+       defer func() { p.fosterParenting = false }()
 
        return inBodyIM(p)
 }
@@ -1156,9 +1310,9 @@ func inTableIM(p *parser) bool {
 func inCaptionIM(p *parser) bool {
        switch p.tok.Type {
        case StartTagToken:
-               switch p.tok.Data {
-               case "caption", "col", "colgroup", "tbody", "td", "tfoot", "thead", "tr":
-                       if p.popUntil(tableScope, "caption") {
+               switch p.tok.DataAtom {
+               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
+                       if p.popUntil(tableScope, a.Caption) {
                                p.clearActiveFormattingElements()
                                p.im = inTableIM
                                return false
@@ -1166,23 +1320,23 @@ func inCaptionIM(p *parser) bool {
                                // Ignore the token.
                                return true
                        }
-               case "select":
+               case a.Select:
                        p.reconstructActiveFormattingElements()
-                       p.addElement(p.tok.Data, p.tok.Attr)
+                       p.addElement()
                        p.framesetOK = false
                        p.im = inSelectInTableIM
                        return true
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "caption":
-                       if p.popUntil(tableScope, "caption") {
+               switch p.tok.DataAtom {
+               case a.Caption:
+                       if p.popUntil(tableScope, a.Caption) {
                                p.clearActiveFormattingElements()
                                p.im = inTableIM
                        }
                        return true
-               case "table":
-                       if p.popUntil(tableScope, "caption") {
+               case a.Table:
+                       if p.popUntil(tableScope, a.Caption) {
                                p.clearActiveFormattingElements()
                                p.im = inTableIM
                                return false
@@ -1190,7 +1344,7 @@ func inCaptionIM(p *parser) bool {
                                // Ignore the token.
                                return true
                        }
-               case "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr":
+               case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
                        // Ignore the token.
                        return true
                }
@@ -1201,6 +1355,16 @@ func inCaptionIM(p *parser) bool {
 // Section 12.2.5.4.12.
 func inColumnGroupIM(p *parser) bool {
        switch p.tok.Type {
+       case TextToken:
+               s := strings.TrimLeft(p.tok.Data, whitespace)
+               if len(s) < len(p.tok.Data) {
+                       // Add the initial whitespace to the current node.
+                       p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
+                       if s == "" {
+                               return true
+                       }
+                       p.tok.Data = s
+               }
        case CommentToken:
                p.addChild(&Node{
                        Type: CommentNode,
@@ -1211,29 +1375,29 @@ func inColumnGroupIM(p *parser) bool {
                // Ignore the token.
                return true
        case StartTagToken:
-               switch p.tok.Data {
-               case "html":
+               switch p.tok.DataAtom {
+               case a.Html:
                        return inBodyIM(p)
-               case "col":
-                       p.addElement(p.tok.Data, p.tok.Attr)
+               case a.Col:
+                       p.addElement()
                        p.oe.pop()
                        p.acknowledgeSelfClosingTag()
                        return true
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "colgroup":
-                       if p.oe.top().Data != "html" {
+               switch p.tok.DataAtom {
+               case a.Colgroup:
+                       if p.oe.top().DataAtom != a.Html {
                                p.oe.pop()
                                p.im = inTableIM
                        }
                        return true
-               case "col":
+               case a.Col:
                        // Ignore the token.
                        return true
                }
        }
-       if p.oe.top().Data != "html" {
+       if p.oe.top().DataAtom != a.Html {
                p.oe.pop()
                p.im = inTableIM
                return false
@@ -1243,48 +1407,42 @@ func inColumnGroupIM(p *parser) bool {
 
 // Section 12.2.5.4.13.
 func inTableBodyIM(p *parser) bool {
-       var (
-               add      bool
-               data     string
-               attr     []Attribute
-               consumed bool
-       )
        switch p.tok.Type {
-       case ErrorToken:
-               // TODO.
-       case TextToken:
-               // TODO.
        case StartTagToken:
-               switch p.tok.Data {
-               case "tr":
-                       add = true
-                       data = p.tok.Data
-                       attr = p.tok.Attr
-                       consumed = true
-               case "td", "th":
-                       add = true
-                       data = "tr"
-                       consumed = false
-               case "caption", "col", "colgroup", "tbody", "tfoot", "thead":
-                       if !p.popUntil(tableScope, "tbody", "thead", "tfoot") {
-                               // Ignore the token.
-                               return true
-                       }
-                       p.im = inTableIM
+               switch p.tok.DataAtom {
+               case a.Tr:
+                       p.clearStackToContext(tableBodyScope)
+                       p.addElement()
+                       p.im = inRowIM
+                       return true
+               case a.Td, a.Th:
+                       p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
                        return false
-               default:
-                       // TODO.
+               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
+                       if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
+                               p.im = inTableIM
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "table":
-                       if p.popUntil(tableScope, "tbody", "thead", "tfoot") {
+               switch p.tok.DataAtom {
+               case a.Tbody, a.Tfoot, a.Thead:
+                       if p.elementInScope(tableScope, p.tok.DataAtom) {
+                               p.clearStackToContext(tableBodyScope)
+                               p.oe.pop()
+                               p.im = inTableIM
+                       }
+                       return true
+               case a.Table:
+                       if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
                                p.im = inTableIM
                                return false
                        }
                        // Ignore the token.
                        return true
-               case "body", "caption", "col", "colgroup", "html", "td", "th", "tr":
+               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
                        // Ignore the token.
                        return true
                }
@@ -1295,117 +1453,102 @@ func inTableBodyIM(p *parser) bool {
                })
                return true
        }
-       if add {
-               // TODO: clear the stack back to a table body context.
-               p.addElement(data, attr)
-               p.im = inRowIM
-               return consumed
-       }
+
        return inTableIM(p)
 }
 
 // Section 12.2.5.4.14.
 func inRowIM(p *parser) bool {
        switch p.tok.Type {
-       case ErrorToken:
-               // TODO.
-       case TextToken:
-               // TODO.
        case StartTagToken:
-               switch p.tok.Data {
-               case "td", "th":
+               switch p.tok.DataAtom {
+               case a.Td, a.Th:
                        p.clearStackToContext(tableRowScope)
-                       p.addElement(p.tok.Data, p.tok.Attr)
+                       p.addElement()
                        p.afe = append(p.afe, &scopeMarker)
                        p.im = inCellIM
                        return true
-               case "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr":
-                       if p.popUntil(tableScope, "tr") {
+               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+                       if p.popUntil(tableScope, a.Tr) {
                                p.im = inTableBodyIM
                                return false
                        }
                        // Ignore the token.
                        return true
-               default:
-                       // TODO.
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "tr":
-                       if p.popUntil(tableScope, "tr") {
+               switch p.tok.DataAtom {
+               case a.Tr:
+                       if p.popUntil(tableScope, a.Tr) {
                                p.im = inTableBodyIM
                                return true
                        }
                        // Ignore the token.
                        return true
-               case "table":
-                       if p.popUntil(tableScope, "tr") {
+               case a.Table:
+                       if p.popUntil(tableScope, a.Tr) {
                                p.im = inTableBodyIM
                                return false
                        }
                        // Ignore the token.
                        return true
-               case "tbody", "tfoot", "thead":
-                       // TODO.
-               case "body", "caption", "col", "colgroup", "html", "td", "th":
+               case a.Tbody, a.Tfoot, a.Thead:
+                       if p.elementInScope(tableScope, p.tok.DataAtom) {
+                               p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
+               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
                        // Ignore the token.
                        return true
-               default:
-                       // TODO.
                }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
        }
+
        return inTableIM(p)
 }
 
 // Section 12.2.5.4.15.
 func inCellIM(p *parser) bool {
-       var (
-               closeTheCellAndReprocess bool
-       )
        switch p.tok.Type {
        case StartTagToken:
-               switch p.tok.Data {
-               case "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr":
-                       // TODO: check for "td" or "th" in table scope.
-                       closeTheCellAndReprocess = true
-               case "select":
+               switch p.tok.DataAtom {
+               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+                       if p.popUntil(tableScope, a.Td, a.Th) {
+                               // Close the cell and reprocess.
+                               p.clearActiveFormattingElements()
+                               p.im = inRowIM
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
+               case a.Select:
                        p.reconstructActiveFormattingElements()
-                       p.addElement(p.tok.Data, p.tok.Attr)
+                       p.addElement()
                        p.framesetOK = false
                        p.im = inSelectInTableIM
                        return true
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "td", "th":
-                       if !p.popUntil(tableScope, p.tok.Data) {
+               switch p.tok.DataAtom {
+               case a.Td, a.Th:
+                       if !p.popUntil(tableScope, p.tok.DataAtom) {
                                // Ignore the token.
                                return true
                        }
                        p.clearActiveFormattingElements()
                        p.im = inRowIM
                        return true
-               case "body", "caption", "col", "colgroup", "html":
-                       // TODO.
-               case "table", "tbody", "tfoot", "thead", "tr":
-                       // TODO: check for matching element in table scope.
-                       closeTheCellAndReprocess = true
-               }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       }
-       if closeTheCellAndReprocess {
-               if p.popUntil(tableScope, "td") || p.popUntil(tableScope, "th") {
+               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
+                       // Ignore the token.
+                       return true
+               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+                       if !p.elementInScope(tableScope, p.tok.DataAtom) {
+                               // Ignore the token.
+                               return true
+                       }
+                       // Close the cell and reprocess.
+                       p.popUntil(tableScope, a.Td, a.Th)
                        p.clearActiveFormattingElements()
                        p.im = inRowIM
                        return false
@@ -1416,66 +1559,73 @@ func inCellIM(p *parser) bool {
 
 // Section 12.2.5.4.16.
 func inSelectIM(p *parser) bool {
-       endSelect := false
        switch p.tok.Type {
        case ErrorToken:
-               // TODO.
+               // Stop parsing.
+               return true
        case TextToken:
-               p.addText(p.tok.Data)
+               p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
        case StartTagToken:
-               switch p.tok.Data {
-               case "html":
-                       // TODO.
-               case "option":
-                       if p.top().Data == "option" {
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Option:
+                       if p.top().DataAtom == a.Option {
                                p.oe.pop()
                        }
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "optgroup":
-                       if p.top().Data == "option" {
+                       p.addElement()
+               case a.Optgroup:
+                       if p.top().DataAtom == a.Option {
                                p.oe.pop()
                        }
-                       if p.top().Data == "optgroup" {
+                       if p.top().DataAtom == a.Optgroup {
                                p.oe.pop()
                        }
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "select":
-                       endSelect = true
-               case "input", "keygen", "textarea":
-                       // TODO.
-               case "script":
-                       // TODO.
-               default:
+                       p.addElement()
+               case a.Select:
+                       p.tok.Type = EndTagToken
+                       return false
+               case a.Input, a.Keygen, a.Textarea:
+                       if p.elementInScope(selectScope, a.Select) {
+                               p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
+                               return false
+                       }
+                       // In order to properly ignore <textarea>, we need to change the tokenizer mode.
+                       p.tokenizer.NextIsNotRawText()
                        // Ignore the token.
+                       return true
+               case a.Script:
+                       return inHeadIM(p)
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "option":
-                       if p.top().Data == "option" {
+               switch p.tok.DataAtom {
+               case a.Option:
+                       if p.top().DataAtom == a.Option {
                                p.oe.pop()
                        }
-               case "optgroup":
+               case a.Optgroup:
                        i := len(p.oe) - 1
-                       if p.oe[i].Data == "option" {
+                       if p.oe[i].DataAtom == a.Option {
                                i--
                        }
-                       if p.oe[i].Data == "optgroup" {
+                       if p.oe[i].DataAtom == a.Optgroup {
                                p.oe = p.oe[:i]
                        }
-               case "select":
-                       endSelect = true
-               default:
-                       // Ignore the token.
+               case a.Select:
+                       if p.popUntil(selectScope, a.Select) {
+                               p.resetInsertionMode()
+                       }
                }
        case CommentToken:
-               p.doc.Add(&Node{
+               p.doc.AppendChild(&Node{
                        Type: CommentNode,
                        Data: p.tok.Data,
                })
+       case DoctypeToken:
+               // Ignore the token.
+               return true
        }
-       if endSelect {
-               p.endSelect()
-       }
+
        return true
 }
 
@@ -1483,10 +1633,10 @@ func inSelectIM(p *parser) bool {
 func inSelectInTableIM(p *parser) bool {
        switch p.tok.Type {
        case StartTagToken, EndTagToken:
-               switch p.tok.Data {
-               case "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th":
-                       if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.Data) {
-                               p.endSelect()
+               switch p.tok.DataAtom {
+               case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
+                       if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
+                               p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
                                return false
                        } else {
                                // Ignore the token.
@@ -1497,40 +1647,35 @@ func inSelectInTableIM(p *parser) bool {
        return inSelectIM(p)
 }
 
-func (p *parser) endSelect() {
-       for i := len(p.oe) - 1; i >= 0; i-- {
-               switch p.oe[i].Data {
-               case "option", "optgroup":
-                       continue
-               case "select":
-                       p.oe = p.oe[:i]
-                       p.resetInsertionMode()
-               }
-               return
-       }
-}
-
 // Section 12.2.5.4.18.
 func afterBodyIM(p *parser) bool {
        switch p.tok.Type {
        case ErrorToken:
                // Stop parsing.
                return true
+       case TextToken:
+               s := strings.TrimLeft(p.tok.Data, whitespace)
+               if len(s) == 0 {
+                       // It was all whitespace.
+                       return inBodyIM(p)
+               }
        case StartTagToken:
-               if p.tok.Data == "html" {
+               if p.tok.DataAtom == a.Html {
                        return inBodyIM(p)
                }
        case EndTagToken:
-               if p.tok.Data == "html" {
-                       p.im = afterAfterBodyIM
+               if p.tok.DataAtom == a.Html {
+                       if !p.fragment {
+                               p.im = afterAfterBodyIM
+                       }
                        return true
                }
        case CommentToken:
                // The comment is attached to the <html> element.
-               if len(p.oe) < 1 || p.oe[0].Data != "html" {
+               if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
                        panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
                }
-               p.oe[0].Add(&Node{
+               p.oe[0].AppendChild(&Node{
                        Type: CommentNode,
                        Data: p.tok.Data,
                })
@@ -1561,24 +1706,24 @@ func inFramesetIM(p *parser) bool {
                        p.addText(s)
                }
        case StartTagToken:
-               switch p.tok.Data {
-               case "html":
+               switch p.tok.DataAtom {
+               case a.Html:
                        return inBodyIM(p)
-               case "frameset":
-                       p.addElement(p.tok.Data, p.tok.Attr)
-               case "frame":
-                       p.addElement(p.tok.Data, p.tok.Attr)
+               case a.Frameset:
+                       p.addElement()
+               case a.Frame:
+                       p.addElement()
                        p.oe.pop()
                        p.acknowledgeSelfClosingTag()
-               case "noframes":
+               case a.Noframes:
                        return inHeadIM(p)
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "frameset":
-                       if p.oe.top().Data != "html" {
+               switch p.tok.DataAtom {
+               case a.Frameset:
+                       if p.oe.top().DataAtom != a.Html {
                                p.oe.pop()
-                               if p.oe.top().Data != "frameset" {
+                               if p.oe.top().DataAtom != a.Frameset {
                                        p.im = afterFramesetIM
                                        return true
                                }
@@ -1611,15 +1756,15 @@ func afterFramesetIM(p *parser) bool {
                        p.addText(s)
                }
        case StartTagToken:
-               switch p.tok.Data {
-               case "html":
+               switch p.tok.DataAtom {
+               case a.Html:
                        return inBodyIM(p)
-               case "noframes":
+               case a.Noframes:
                        return inHeadIM(p)
                }
        case EndTagToken:
-               switch p.tok.Data {
-               case "html":
+               switch p.tok.DataAtom {
+               case a.Html:
                        p.im = afterAfterFramesetIM
                        return true
                }
@@ -1636,17 +1781,23 @@ func afterAfterBodyIM(p *parser) bool {
                // Stop parsing.
                return true
        case TextToken:
-               // TODO.
+               s := strings.TrimLeft(p.tok.Data, whitespace)
+               if len(s) == 0 {
+                       // It was all whitespace.
+                       return inBodyIM(p)
+               }
        case StartTagToken:
-               if p.tok.Data == "html" {
+               if p.tok.DataAtom == a.Html {
                        return inBodyIM(p)
                }
        case CommentToken:
-               p.doc.Add(&Node{
+               p.doc.AppendChild(&Node{
                        Type: CommentNode,
                        Data: p.tok.Data,
                })
                return true
+       case DoctypeToken:
+               return inBodyIM(p)
        }
        p.im = inBodyIM
        return false
@@ -1656,7 +1807,7 @@ func afterAfterBodyIM(p *parser) bool {
 func afterAfterFramesetIM(p *parser) bool {
        switch p.tok.Type {
        case CommentToken:
-               p.addChild(&Node{
+               p.doc.AppendChild(&Node{
                        Type: CommentNode,
                        Data: p.tok.Data,
                })
@@ -1670,35 +1821,34 @@ func afterAfterFramesetIM(p *parser) bool {
                        return -1
                }, p.tok.Data)
                if s != "" {
-                       p.reconstructActiveFormattingElements()
-                       p.addText(s)
+                       p.tok.Data = s
+                       return inBodyIM(p)
                }
        case StartTagToken:
-               switch p.tok.Data {
-               case "html":
+               switch p.tok.DataAtom {
+               case a.Html:
                        return inBodyIM(p)
-               case "noframes":
+               case a.Noframes:
                        return inHeadIM(p)
                }
+       case DoctypeToken:
+               return inBodyIM(p)
        default:
                // Ignore the token.
        }
        return true
 }
 
+const whitespaceOrNUL = whitespace + "\x00"
+
 // Section 12.2.5.5.
 func parseForeignContent(p *parser) bool {
        switch p.tok.Type {
        case TextToken:
-               // TODO: HTML integration points.
-               if p.top().Namespace == "" {
-                       inBodyIM(p)
-                       p.resetInsertionMode()
-                       return true
-               }
                if p.framesetOK {
-                       p.framesetOK = strings.TrimLeft(p.tok.Data, whitespace) == ""
+                       p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
                }
+               p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
                p.addText(p.tok.Data)
        case CommentToken:
                p.addChild(&Node{
@@ -1706,15 +1856,21 @@ func parseForeignContent(p *parser) bool {
                        Data: p.tok.Data,
                })
        case StartTagToken:
-               if htmlIntegrationPoint(p.top()) {
-                       inBodyIM(p)
-                       p.resetInsertionMode()
-                       return true
+               b := breakout[p.tok.Data]
+               if p.tok.DataAtom == a.Font {
+               loop:
+                       for _, attr := range p.tok.Attr {
+                               switch attr.Key {
+                               case "color", "face", "size":
+                                       b = true
+                                       break loop
+                               }
+                       }
                }
-               if breakout[p.tok.Data] {
+               if b {
                        for i := len(p.oe) - 1; i >= 0; i-- {
-                               // TODO: MathML integration points.
-                               if p.oe[i].Namespace == "" || htmlIntegrationPoint(p.oe[i]) {
+                               n := p.oe[i]
+                               if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
                                        p.oe = p.oe[:i+1]
                                        break
                                }
@@ -1723,21 +1879,31 @@ func parseForeignContent(p *parser) bool {
                }
                switch p.top().Namespace {
                case "math":
-                       // TODO: adjust MathML attributes.
+                       adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
                case "svg":
                        // Adjust SVG tag names. The tokenizer lower-cases tag names, but
                        // SVG wants e.g. "foreignObject" with a capital second "O".
                        if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
+                               p.tok.DataAtom = a.Lookup([]byte(x))
                                p.tok.Data = x
                        }
-                       // TODO: adjust SVG attributes.
+                       adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
                default:
                        panic("html: bad parser state: unexpected namespace")
                }
                adjustForeignAttributes(p.tok.Attr)
                namespace := p.top().Namespace
-               p.addElement(p.tok.Data, p.tok.Attr)
+               p.addElement()
                p.top().Namespace = namespace
+               if namespace != "" {
+                       // Don't let the tokenizer go into raw text mode in foreign content
+                       // (e.g. in an SVG <title> tag).
+                       p.tokenizer.NextIsNotRawText()
+               }
+               if p.hasSelfClosingToken {
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+               }
        case EndTagToken:
                for i := len(p.oe) - 1; i >= 0; i-- {
                        if p.oe[i].Namespace == "" {
@@ -1764,34 +1930,80 @@ func (p *parser) inForeignContent() bool {
        if n.Namespace == "" {
                return false
        }
-       // TODO: MathML, HTML integration points.
-       // TODO: MathML's annotation-xml combining with SVG's svg.
+       if mathMLTextIntegrationPoint(n) {
+               if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
+                       return false
+               }
+               if p.tok.Type == TextToken {
+                       return false
+               }
+       }
+       if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
+               return false
+       }
+       if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
+               return false
+       }
+       if p.tok.Type == ErrorToken {
+               return false
+       }
        return true
 }
 
-func (p *parser) parse() error {
-       // Iterate until EOF. Any other error will cause an early return.
-       consumed := true
-       for {
-               if consumed {
-                       if err := p.read(); err != nil {
-                               if err == io.EOF {
-                                       break
-                               }
-                               return err
-                       }
-               }
+// parseImpliedToken parses a token as though it had appeared in the parser's
+// input.
+func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
+       realToken, selfClosing := p.tok, p.hasSelfClosingToken
+       p.tok = Token{
+               Type:     t,
+               DataAtom: dataAtom,
+               Data:     data,
+       }
+       p.hasSelfClosingToken = false
+       p.parseCurrentToken()
+       p.tok, p.hasSelfClosingToken = realToken, selfClosing
+}
+
+// parseCurrentToken runs the current token through the parsing routines
+// until it is consumed.
+func (p *parser) parseCurrentToken() {
+       if p.tok.Type == SelfClosingTagToken {
+               p.hasSelfClosingToken = true
+               p.tok.Type = StartTagToken
+       }
+
+       consumed := false
+       for !consumed {
                if p.inForeignContent() {
                        consumed = parseForeignContent(p)
                } else {
                        consumed = p.im(p)
                }
        }
-       // Loop until the final token (the ErrorToken signifying EOF) is consumed.
-       for {
-               if consumed = p.im(p); consumed {
-                       break
+
+       if p.hasSelfClosingToken {
+               // This is a parse error, but ignore it.
+               p.hasSelfClosingToken = false
+       }
+}
+
+func (p *parser) parse() error {
+       // Iterate until EOF. Any other error will cause an early return.
+       var err error
+       for err != io.EOF {
+               // CDATA sections are allowed only in foreign content.
+               n := p.oe.top()
+               p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
+               // Read and parse the next token.
+               p.tokenizer.Next()
+               p.tok = p.tokenizer.Token()
+               if p.tok.Type == ErrorToken {
+                       err = p.tokenizer.Err()
+                       if err != nil && err != io.EOF {
+                               return err
+                       }
                }
+               p.parseCurrentToken()
        }
        return nil
 }
@@ -1815,36 +2027,44 @@ func Parse(r io.Reader) (*Node, error) {
        return p.doc, nil
 }
 
-// ParseFragment parses a fragment of HTML and returns the nodes that were 
+// ParseFragment parses a fragment of HTML and returns the nodes that were
 // found. If the fragment is the InnerHTML for an existing element, pass that
 // element in context.
 func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
+       contextTag := ""
+       if context != nil {
+               if context.Type != ElementNode {
+                       return nil, errors.New("html: ParseFragment of non-element Node")
+               }
+               // The next check isn't just context.DataAtom.String() == context.Data because
+               // it is valid to pass an element whose tag isn't a known atom. For example,
+               // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
+               if context.DataAtom != a.Lookup([]byte(context.Data)) {
+                       return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
+               }
+               contextTag = context.DataAtom.String()
+       }
        p := &parser{
-               tokenizer: NewTokenizer(r),
+               tokenizer: NewTokenizerFragment(r, contextTag),
                doc: &Node{
                        Type: DocumentNode,
                },
                scripting: true,
+               fragment:  true,
                context:   context,
        }
 
-       if context != nil {
-               switch context.Data {
-               case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp":
-                       p.tokenizer.rawTag = context.Data
-               }
-       }
-
        root := &Node{
-               Type: ElementNode,
-               Data: "html",
+               Type:     ElementNode,
+               DataAtom: a.Html,
+               Data:     a.Html.String(),
        }
-       p.doc.Add(root)
+       p.doc.AppendChild(root)
        p.oe = nodeStack{root}
        p.resetInsertionMode()
 
        for n := context; n != nil; n = n.Parent {
-               if n.Type == ElementNode && n.Data == "form" {
+               if n.Type == ElementNode && n.DataAtom == a.Form {
                        p.form = n
                        break
                }
@@ -1860,10 +2080,12 @@ func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
                parent = root
        }
 
-       result := parent.Child
-       parent.Child = nil
-       for _, n := range result {
-               n.Parent = nil
+       var result []*Node
+       for c := parent.FirstChild; c != nil; {
+               next := c.NextSibling
+               parent.RemoveChild(c)
+               result = append(result, c)
+               c = next
        }
        return result, nil
 }