update packaging
[platform/core/system/edge-orchestration.git] / vendor / github.com / miekg / dns / vendor / golang.org / x / net / html / parse.go
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package html
6
7 import (
8         "errors"
9         "fmt"
10         "io"
11         "strings"
12
13         a "golang.org/x/net/html/atom"
14 )
15
16 // A parser implements the HTML5 parsing algorithm:
17 // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
18 type parser struct {
19         // tokenizer provides the tokens for the parser.
20         tokenizer *Tokenizer
21         // tok is the most recently read token.
22         tok Token
23         // Self-closing tags like <hr/> are treated as start tags, except that
24         // hasSelfClosingToken is set while they are being processed.
25         hasSelfClosingToken bool
26         // doc is the document root element.
27         doc *Node
28         // The stack of open elements (section 12.2.4.2) and active formatting
29         // elements (section 12.2.4.3).
30         oe, afe nodeStack
31         // Element pointers (section 12.2.4.4).
32         head, form *Node
33         // Other parsing state flags (section 12.2.4.5).
34         scripting, framesetOK bool
35         // The stack of template insertion modes
36         templateStack insertionModeStack
37         // im is the current insertion mode.
38         im insertionMode
39         // originalIM is the insertion mode to go back to after completing a text
40         // or inTableText insertion mode.
41         originalIM insertionMode
42         // fosterParenting is whether new elements should be inserted according to
43         // the foster parenting rules (section 12.2.6.1).
44         fosterParenting bool
45         // quirks is whether the parser is operating in "quirks mode."
46         quirks bool
47         // fragment is whether the parser is parsing an HTML fragment.
48         fragment bool
49         // context is the context element when parsing an HTML fragment
50         // (section 12.4).
51         context *Node
52 }
53
54 func (p *parser) top() *Node {
55         if n := p.oe.top(); n != nil {
56                 return n
57         }
58         return p.doc
59 }
60
61 // Stop tags for use in popUntil. These come from section 12.2.4.2.
62 var (
63         defaultScopeStopTags = map[string][]a.Atom{
64                 "":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
65                 "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
66                 "svg":  {a.Desc, a.ForeignObject, a.Title},
67         }
68 )
69
70 type scope int
71
72 const (
73         defaultScope scope = iota
74         listItemScope
75         buttonScope
76         tableScope
77         tableRowScope
78         tableBodyScope
79         selectScope
80 )
81
82 // popUntil pops the stack of open elements at the highest element whose tag
83 // is in matchTags, provided there is no higher element in the scope's stop
84 // tags (as defined in section 12.2.4.2). It returns whether or not there was
85 // such an element. If there was not, popUntil leaves the stack unchanged.
86 //
87 // For example, the set of stop tags for table scope is: "html", "table". If
88 // the stack was:
89 // ["html", "body", "font", "table", "b", "i", "u"]
90 // then popUntil(tableScope, "font") would return false, but
91 // popUntil(tableScope, "i") would return true and the stack would become:
92 // ["html", "body", "font", "table", "b"]
93 //
94 // If an element's tag is in both the stop tags and matchTags, then the stack
95 // will be popped and the function returns true (provided, of course, there was
96 // no higher element in the stack that was also in the stop tags). For example,
97 // popUntil(tableScope, "table") returns true and leaves:
98 // ["html", "body", "font"]
99 func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
100         if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
101                 p.oe = p.oe[:i]
102                 return true
103         }
104         return false
105 }
106
107 // indexOfElementInScope returns the index in p.oe of the highest element whose
108 // tag is in matchTags that is in scope. If no matching element is in scope, it
109 // returns -1.
110 func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
111         for i := len(p.oe) - 1; i >= 0; i-- {
112                 tagAtom := p.oe[i].DataAtom
113                 if p.oe[i].Namespace == "" {
114                         for _, t := range matchTags {
115                                 if t == tagAtom {
116                                         return i
117                                 }
118                         }
119                         switch s {
120                         case defaultScope:
121                                 // No-op.
122                         case listItemScope:
123                                 if tagAtom == a.Ol || tagAtom == a.Ul {
124                                         return -1
125                                 }
126                         case buttonScope:
127                                 if tagAtom == a.Button {
128                                         return -1
129                                 }
130                         case tableScope:
131                                 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
132                                         return -1
133                                 }
134                         case selectScope:
135                                 if tagAtom != a.Optgroup && tagAtom != a.Option {
136                                         return -1
137                                 }
138                         default:
139                                 panic("unreachable")
140                         }
141                 }
142                 switch s {
143                 case defaultScope, listItemScope, buttonScope:
144                         for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
145                                 if t == tagAtom {
146                                         return -1
147                                 }
148                         }
149                 }
150         }
151         return -1
152 }
153
154 // elementInScope is like popUntil, except that it doesn't modify the stack of
155 // open elements.
156 func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
157         return p.indexOfElementInScope(s, matchTags...) != -1
158 }
159
160 // clearStackToContext pops elements off the stack of open elements until a
161 // scope-defined element is found.
162 func (p *parser) clearStackToContext(s scope) {
163         for i := len(p.oe) - 1; i >= 0; i-- {
164                 tagAtom := p.oe[i].DataAtom
165                 switch s {
166                 case tableScope:
167                         if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
168                                 p.oe = p.oe[:i+1]
169                                 return
170                         }
171                 case tableRowScope:
172                         if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
173                                 p.oe = p.oe[:i+1]
174                                 return
175                         }
176                 case tableBodyScope:
177                         if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
178                                 p.oe = p.oe[:i+1]
179                                 return
180                         }
181                 default:
182                         panic("unreachable")
183                 }
184         }
185 }
186
187 // generateImpliedEndTags pops nodes off the stack of open elements as long as
188 // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
189 // If exceptions are specified, nodes with that name will not be popped off.
190 func (p *parser) generateImpliedEndTags(exceptions ...string) {
191         var i int
192 loop:
193         for i = len(p.oe) - 1; i >= 0; i-- {
194                 n := p.oe[i]
195                 if n.Type == ElementNode {
196                         switch n.DataAtom {
197                         case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
198                                 for _, except := range exceptions {
199                                         if n.Data == except {
200                                                 break loop
201                                         }
202                                 }
203                                 continue
204                         }
205                 }
206                 break
207         }
208
209         p.oe = p.oe[:i+1]
210 }
211
212 // addChild adds a child node n to the top element, and pushes n onto the stack
213 // of open elements if it is an element node.
214 func (p *parser) addChild(n *Node) {
215         if p.shouldFosterParent() {
216                 p.fosterParent(n)
217         } else {
218                 p.top().AppendChild(n)
219         }
220
221         if n.Type == ElementNode {
222                 p.oe = append(p.oe, n)
223         }
224 }
225
226 // shouldFosterParent returns whether the next node to be added should be
227 // foster parented.
228 func (p *parser) shouldFosterParent() bool {
229         if p.fosterParenting {
230                 switch p.top().DataAtom {
231                 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
232                         return true
233                 }
234         }
235         return false
236 }
237
238 // fosterParent adds a child node according to the foster parenting rules.
239 // Section 12.2.6.1, "foster parenting".
240 func (p *parser) fosterParent(n *Node) {
241         var table, parent, prev, template *Node
242         var i int
243         for i = len(p.oe) - 1; i >= 0; i-- {
244                 if p.oe[i].DataAtom == a.Table {
245                         table = p.oe[i]
246                         break
247                 }
248         }
249
250         var j int
251         for j = len(p.oe) - 1; j >= 0; j-- {
252                 if p.oe[j].DataAtom == a.Template {
253                         template = p.oe[j]
254                         break
255                 }
256         }
257
258         if template != nil && (table == nil || j > i) {
259                 template.AppendChild(n)
260                 return
261         }
262
263         if table == nil {
264                 // The foster parent is the html element.
265                 parent = p.oe[0]
266         } else {
267                 parent = table.Parent
268         }
269         if parent == nil {
270                 parent = p.oe[i-1]
271         }
272
273         if table != nil {
274                 prev = table.PrevSibling
275         } else {
276                 prev = parent.LastChild
277         }
278         if prev != nil && prev.Type == TextNode && n.Type == TextNode {
279                 prev.Data += n.Data
280                 return
281         }
282
283         parent.InsertBefore(n, table)
284 }
285
286 // addText adds text to the preceding node if it is a text node, or else it
287 // calls addChild with a new text node.
288 func (p *parser) addText(text string) {
289         if text == "" {
290                 return
291         }
292
293         if p.shouldFosterParent() {
294                 p.fosterParent(&Node{
295                         Type: TextNode,
296                         Data: text,
297                 })
298                 return
299         }
300
301         t := p.top()
302         if n := t.LastChild; n != nil && n.Type == TextNode {
303                 n.Data += text
304                 return
305         }
306         p.addChild(&Node{
307                 Type: TextNode,
308                 Data: text,
309         })
310 }
311
312 // addElement adds a child element based on the current token.
313 func (p *parser) addElement() {
314         p.addChild(&Node{
315                 Type:     ElementNode,
316                 DataAtom: p.tok.DataAtom,
317                 Data:     p.tok.Data,
318                 Attr:     p.tok.Attr,
319         })
320 }
321
322 // Section 12.2.4.3.
323 func (p *parser) addFormattingElement() {
324         tagAtom, attr := p.tok.DataAtom, p.tok.Attr
325         p.addElement()
326
327         // Implement the Noah's Ark clause, but with three per family instead of two.
328         identicalElements := 0
329 findIdenticalElements:
330         for i := len(p.afe) - 1; i >= 0; i-- {
331                 n := p.afe[i]
332                 if n.Type == scopeMarkerNode {
333                         break
334                 }
335                 if n.Type != ElementNode {
336                         continue
337                 }
338                 if n.Namespace != "" {
339                         continue
340                 }
341                 if n.DataAtom != tagAtom {
342                         continue
343                 }
344                 if len(n.Attr) != len(attr) {
345                         continue
346                 }
347         compareAttributes:
348                 for _, t0 := range n.Attr {
349                         for _, t1 := range attr {
350                                 if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
351                                         // Found a match for this attribute, continue with the next attribute.
352                                         continue compareAttributes
353                                 }
354                         }
355                         // If we get here, there is no attribute that matches a.
356                         // Therefore the element is not identical to the new one.
357                         continue findIdenticalElements
358                 }
359
360                 identicalElements++
361                 if identicalElements >= 3 {
362                         p.afe.remove(n)
363                 }
364         }
365
366         p.afe = append(p.afe, p.top())
367 }
368
369 // Section 12.2.4.3.
370 func (p *parser) clearActiveFormattingElements() {
371         for {
372                 n := p.afe.pop()
373                 if len(p.afe) == 0 || n.Type == scopeMarkerNode {
374                         return
375                 }
376         }
377 }
378
379 // Section 12.2.4.3.
380 func (p *parser) reconstructActiveFormattingElements() {
381         n := p.afe.top()
382         if n == nil {
383                 return
384         }
385         if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
386                 return
387         }
388         i := len(p.afe) - 1
389         for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
390                 if i == 0 {
391                         i = -1
392                         break
393                 }
394                 i--
395                 n = p.afe[i]
396         }
397         for {
398                 i++
399                 clone := p.afe[i].clone()
400                 p.addChild(clone)
401                 p.afe[i] = clone
402                 if i == len(p.afe)-1 {
403                         break
404                 }
405         }
406 }
407
408 // Section 12.2.5.
409 func (p *parser) acknowledgeSelfClosingTag() {
410         p.hasSelfClosingToken = false
411 }
412
413 // An insertion mode (section 12.2.4.1) is the state transition function from
414 // a particular state in the HTML5 parser's state machine. It updates the
415 // parser's fields depending on parser.tok (where ErrorToken means EOF).
416 // It returns whether the token was consumed.
417 type insertionMode func(*parser) bool
418
419 // setOriginalIM sets the insertion mode to return to after completing a text or
420 // inTableText insertion mode.
421 // Section 12.2.4.1, "using the rules for".
422 func (p *parser) setOriginalIM() {
423         if p.originalIM != nil {
424                 panic("html: bad parser state: originalIM was set twice")
425         }
426         p.originalIM = p.im
427 }
428
429 // Section 12.2.4.1, "reset the insertion mode".
430 func (p *parser) resetInsertionMode() {
431         for i := len(p.oe) - 1; i >= 0; i-- {
432                 n := p.oe[i]
433                 last := i == 0
434                 if last && p.context != nil {
435                         n = p.context
436                 }
437
438                 switch n.DataAtom {
439                 case a.Select:
440                         if !last {
441                                 for ancestor, first := n, p.oe[0]; ancestor != first; {
442                                         if ancestor == first {
443                                                 break
444                                         }
445                                         ancestor = p.oe[p.oe.index(ancestor)-1]
446                                         switch ancestor.DataAtom {
447                                         case a.Template:
448                                                 p.im = inSelectIM
449                                                 return
450                                         case a.Table:
451                                                 p.im = inSelectInTableIM
452                                                 return
453                                         }
454                                 }
455                         }
456                         p.im = inSelectIM
457                 case a.Td, a.Th:
458                         // TODO: remove this divergence from the HTML5 spec.
459                         //
460                         // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
461                         p.im = inCellIM
462                 case a.Tr:
463                         p.im = inRowIM
464                 case a.Tbody, a.Thead, a.Tfoot:
465                         p.im = inTableBodyIM
466                 case a.Caption:
467                         p.im = inCaptionIM
468                 case a.Colgroup:
469                         p.im = inColumnGroupIM
470                 case a.Table:
471                         p.im = inTableIM
472                 case a.Template:
473                         // TODO: remove this divergence from the HTML5 spec.
474                         if n.Namespace != "" {
475                                 continue
476                         }
477                         p.im = p.templateStack.top()
478                 case a.Head:
479                         // TODO: remove this divergence from the HTML5 spec.
480                         //
481                         // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
482                         p.im = inHeadIM
483                 case a.Body:
484                         p.im = inBodyIM
485                 case a.Frameset:
486                         p.im = inFramesetIM
487                 case a.Html:
488                         if p.head == nil {
489                                 p.im = beforeHeadIM
490                         } else {
491                                 p.im = afterHeadIM
492                         }
493                 default:
494                         if last {
495                                 p.im = inBodyIM
496                                 return
497                         }
498                         continue
499                 }
500                 return
501         }
502 }
503
504 const whitespace = " \t\r\n\f"
505
506 // Section 12.2.6.4.1.
507 func initialIM(p *parser) bool {
508         switch p.tok.Type {
509         case TextToken:
510                 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
511                 if len(p.tok.Data) == 0 {
512                         // It was all whitespace, so ignore it.
513                         return true
514                 }
515         case CommentToken:
516                 p.doc.AppendChild(&Node{
517                         Type: CommentNode,
518                         Data: p.tok.Data,
519                 })
520                 return true
521         case DoctypeToken:
522                 n, quirks := parseDoctype(p.tok.Data)
523                 p.doc.AppendChild(n)
524                 p.quirks = quirks
525                 p.im = beforeHTMLIM
526                 return true
527         }
528         p.quirks = true
529         p.im = beforeHTMLIM
530         return false
531 }
532
533 // Section 12.2.6.4.2.
534 func beforeHTMLIM(p *parser) bool {
535         switch p.tok.Type {
536         case DoctypeToken:
537                 // Ignore the token.
538                 return true
539         case TextToken:
540                 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
541                 if len(p.tok.Data) == 0 {
542                         // It was all whitespace, so ignore it.
543                         return true
544                 }
545         case StartTagToken:
546                 if p.tok.DataAtom == a.Html {
547                         p.addElement()
548                         p.im = beforeHeadIM
549                         return true
550                 }
551         case EndTagToken:
552                 switch p.tok.DataAtom {
553                 case a.Head, a.Body, a.Html, a.Br:
554                         p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
555                         return false
556                 default:
557                         // Ignore the token.
558                         return true
559                 }
560         case CommentToken:
561                 p.doc.AppendChild(&Node{
562                         Type: CommentNode,
563                         Data: p.tok.Data,
564                 })
565                 return true
566         }
567         p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
568         return false
569 }
570
571 // Section 12.2.6.4.3.
572 func beforeHeadIM(p *parser) bool {
573         switch p.tok.Type {
574         case TextToken:
575                 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
576                 if len(p.tok.Data) == 0 {
577                         // It was all whitespace, so ignore it.
578                         return true
579                 }
580         case StartTagToken:
581                 switch p.tok.DataAtom {
582                 case a.Head:
583                         p.addElement()
584                         p.head = p.top()
585                         p.im = inHeadIM
586                         return true
587                 case a.Html:
588                         return inBodyIM(p)
589                 }
590         case EndTagToken:
591                 switch p.tok.DataAtom {
592                 case a.Head, a.Body, a.Html, a.Br:
593                         p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
594                         return false
595                 default:
596                         // Ignore the token.
597                         return true
598                 }
599         case CommentToken:
600                 p.addChild(&Node{
601                         Type: CommentNode,
602                         Data: p.tok.Data,
603                 })
604                 return true
605         case DoctypeToken:
606                 // Ignore the token.
607                 return true
608         }
609
610         p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
611         return false
612 }
613
614 // Section 12.2.6.4.4.
615 func inHeadIM(p *parser) bool {
616         switch p.tok.Type {
617         case TextToken:
618                 s := strings.TrimLeft(p.tok.Data, whitespace)
619                 if len(s) < len(p.tok.Data) {
620                         // Add the initial whitespace to the current node.
621                         p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
622                         if s == "" {
623                                 return true
624                         }
625                         p.tok.Data = s
626                 }
627         case StartTagToken:
628                 switch p.tok.DataAtom {
629                 case a.Html:
630                         return inBodyIM(p)
631                 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
632                         p.addElement()
633                         p.oe.pop()
634                         p.acknowledgeSelfClosingTag()
635                         return true
636                 case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
637                         p.addElement()
638                         p.setOriginalIM()
639                         p.im = textIM
640                         return true
641                 case a.Head:
642                         // Ignore the token.
643                         return true
644                 case a.Template:
645                         p.addElement()
646                         p.afe = append(p.afe, &scopeMarker)
647                         p.framesetOK = false
648                         p.im = inTemplateIM
649                         p.templateStack = append(p.templateStack, inTemplateIM)
650                         return true
651                 }
652         case EndTagToken:
653                 switch p.tok.DataAtom {
654                 case a.Head:
655                         p.oe.pop()
656                         p.im = afterHeadIM
657                         return true
658                 case a.Body, a.Html, a.Br:
659                         p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
660                         return false
661                 case a.Template:
662                         if !p.oe.contains(a.Template) {
663                                 return true
664                         }
665                         // TODO: remove this divergence from the HTML5 spec.
666                         //
667                         // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
668                         p.generateImpliedEndTags()
669                         for i := len(p.oe) - 1; i >= 0; i-- {
670                                 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
671                                         p.oe = p.oe[:i]
672                                         break
673                                 }
674                         }
675                         p.clearActiveFormattingElements()
676                         p.templateStack.pop()
677                         p.resetInsertionMode()
678                         return true
679                 default:
680                         // Ignore the token.
681                         return true
682                 }
683         case CommentToken:
684                 p.addChild(&Node{
685                         Type: CommentNode,
686                         Data: p.tok.Data,
687                 })
688                 return true
689         case DoctypeToken:
690                 // Ignore the token.
691                 return true
692         }
693
694         p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
695         return false
696 }
697
698 // Section 12.2.6.4.6.
699 func afterHeadIM(p *parser) bool {
700         switch p.tok.Type {
701         case TextToken:
702                 s := strings.TrimLeft(p.tok.Data, whitespace)
703                 if len(s) < len(p.tok.Data) {
704                         // Add the initial whitespace to the current node.
705                         p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
706                         if s == "" {
707                                 return true
708                         }
709                         p.tok.Data = s
710                 }
711         case StartTagToken:
712                 switch p.tok.DataAtom {
713                 case a.Html:
714                         return inBodyIM(p)
715                 case a.Body:
716                         p.addElement()
717                         p.framesetOK = false
718                         p.im = inBodyIM
719                         return true
720                 case a.Frameset:
721                         p.addElement()
722                         p.im = inFramesetIM
723                         return true
724                 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
725                         p.oe = append(p.oe, p.head)
726                         defer p.oe.remove(p.head)
727                         return inHeadIM(p)
728                 case a.Head:
729                         // Ignore the token.
730                         return true
731                 }
732         case EndTagToken:
733                 switch p.tok.DataAtom {
734                 case a.Body, a.Html, a.Br:
735                         // Drop down to creating an implied <body> tag.
736                 case a.Template:
737                         return inHeadIM(p)
738                 default:
739                         // Ignore the token.
740                         return true
741                 }
742         case CommentToken:
743                 p.addChild(&Node{
744                         Type: CommentNode,
745                         Data: p.tok.Data,
746                 })
747                 return true
748         case DoctypeToken:
749                 // Ignore the token.
750                 return true
751         }
752
753         p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
754         p.framesetOK = true
755         return false
756 }
757
758 // copyAttributes copies attributes of src not found on dst to dst.
759 func copyAttributes(dst *Node, src Token) {
760         if len(src.Attr) == 0 {
761                 return
762         }
763         attr := map[string]string{}
764         for _, t := range dst.Attr {
765                 attr[t.Key] = t.Val
766         }
767         for _, t := range src.Attr {
768                 if _, ok := attr[t.Key]; !ok {
769                         dst.Attr = append(dst.Attr, t)
770                         attr[t.Key] = t.Val
771                 }
772         }
773 }
774
775 // Section 12.2.6.4.7.
776 func inBodyIM(p *parser) bool {
777         switch p.tok.Type {
778         case TextToken:
779                 d := p.tok.Data
780                 switch n := p.oe.top(); n.DataAtom {
781                 case a.Pre, a.Listing:
782                         if n.FirstChild == nil {
783                                 // Ignore a newline at the start of a <pre> block.
784                                 if d != "" && d[0] == '\r' {
785                                         d = d[1:]
786                                 }
787                                 if d != "" && d[0] == '\n' {
788                                         d = d[1:]
789                                 }
790                         }
791                 }
792                 d = strings.Replace(d, "\x00", "", -1)
793                 if d == "" {
794                         return true
795                 }
796                 p.reconstructActiveFormattingElements()
797                 p.addText(d)
798                 if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
799                         // There were non-whitespace characters inserted.
800                         p.framesetOK = false
801                 }
802         case StartTagToken:
803                 switch p.tok.DataAtom {
804                 case a.Html:
805                         if p.oe.contains(a.Template) {
806                                 return true
807                         }
808                         copyAttributes(p.oe[0], p.tok)
809                 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
810                         return inHeadIM(p)
811                 case a.Body:
812                         if p.oe.contains(a.Template) {
813                                 return true
814                         }
815                         if len(p.oe) >= 2 {
816                                 body := p.oe[1]
817                                 if body.Type == ElementNode && body.DataAtom == a.Body {
818                                         p.framesetOK = false
819                                         copyAttributes(body, p.tok)
820                                 }
821                         }
822                 case a.Frameset:
823                         if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
824                                 // Ignore the token.
825                                 return true
826                         }
827                         body := p.oe[1]
828                         if body.Parent != nil {
829                                 body.Parent.RemoveChild(body)
830                         }
831                         p.oe = p.oe[:1]
832                         p.addElement()
833                         p.im = inFramesetIM
834                         return true
835                 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
836                         p.popUntil(buttonScope, a.P)
837                         p.addElement()
838                 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
839                         p.popUntil(buttonScope, a.P)
840                         switch n := p.top(); n.DataAtom {
841                         case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
842                                 p.oe.pop()
843                         }
844                         p.addElement()
845                 case a.Pre, a.Listing:
846                         p.popUntil(buttonScope, a.P)
847                         p.addElement()
848                         // The newline, if any, will be dealt with by the TextToken case.
849                         p.framesetOK = false
850                 case a.Form:
851                         if p.form != nil && !p.oe.contains(a.Template) {
852                                 // Ignore the token
853                                 return true
854                         }
855                         p.popUntil(buttonScope, a.P)
856                         p.addElement()
857                         if !p.oe.contains(a.Template) {
858                                 p.form = p.top()
859                         }
860                 case a.Li:
861                         p.framesetOK = false
862                         for i := len(p.oe) - 1; i >= 0; i-- {
863                                 node := p.oe[i]
864                                 switch node.DataAtom {
865                                 case a.Li:
866                                         p.oe = p.oe[:i]
867                                 case a.Address, a.Div, a.P:
868                                         continue
869                                 default:
870                                         if !isSpecialElement(node) {
871                                                 continue
872                                         }
873                                 }
874                                 break
875                         }
876                         p.popUntil(buttonScope, a.P)
877                         p.addElement()
878                 case a.Dd, a.Dt:
879                         p.framesetOK = false
880                         for i := len(p.oe) - 1; i >= 0; i-- {
881                                 node := p.oe[i]
882                                 switch node.DataAtom {
883                                 case a.Dd, a.Dt:
884                                         p.oe = p.oe[:i]
885                                 case a.Address, a.Div, a.P:
886                                         continue
887                                 default:
888                                         if !isSpecialElement(node) {
889                                                 continue
890                                         }
891                                 }
892                                 break
893                         }
894                         p.popUntil(buttonScope, a.P)
895                         p.addElement()
896                 case a.Plaintext:
897                         p.popUntil(buttonScope, a.P)
898                         p.addElement()
899                 case a.Button:
900                         p.popUntil(defaultScope, a.Button)
901                         p.reconstructActiveFormattingElements()
902                         p.addElement()
903                         p.framesetOK = false
904                 case a.A:
905                         for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
906                                 if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
907                                         p.inBodyEndTagFormatting(a.A)
908                                         p.oe.remove(n)
909                                         p.afe.remove(n)
910                                         break
911                                 }
912                         }
913                         p.reconstructActiveFormattingElements()
914                         p.addFormattingElement()
915                 case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
916                         p.reconstructActiveFormattingElements()
917                         p.addFormattingElement()
918                 case a.Nobr:
919                         p.reconstructActiveFormattingElements()
920                         if p.elementInScope(defaultScope, a.Nobr) {
921                                 p.inBodyEndTagFormatting(a.Nobr)
922                                 p.reconstructActiveFormattingElements()
923                         }
924                         p.addFormattingElement()
925                 case a.Applet, a.Marquee, a.Object:
926                         p.reconstructActiveFormattingElements()
927                         p.addElement()
928                         p.afe = append(p.afe, &scopeMarker)
929                         p.framesetOK = false
930                 case a.Table:
931                         if !p.quirks {
932                                 p.popUntil(buttonScope, a.P)
933                         }
934                         p.addElement()
935                         p.framesetOK = false
936                         p.im = inTableIM
937                         return true
938                 case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
939                         p.reconstructActiveFormattingElements()
940                         p.addElement()
941                         p.oe.pop()
942                         p.acknowledgeSelfClosingTag()
943                         if p.tok.DataAtom == a.Input {
944                                 for _, t := range p.tok.Attr {
945                                         if t.Key == "type" {
946                                                 if strings.ToLower(t.Val) == "hidden" {
947                                                         // Skip setting framesetOK = false
948                                                         return true
949                                                 }
950                                         }
951                                 }
952                         }
953                         p.framesetOK = false
954                 case a.Param, a.Source, a.Track:
955                         p.addElement()
956                         p.oe.pop()
957                         p.acknowledgeSelfClosingTag()
958                 case a.Hr:
959                         p.popUntil(buttonScope, a.P)
960                         p.addElement()
961                         p.oe.pop()
962                         p.acknowledgeSelfClosingTag()
963                         p.framesetOK = false
964                 case a.Image:
965                         p.tok.DataAtom = a.Img
966                         p.tok.Data = a.Img.String()
967                         return false
968                 case a.Isindex:
969                         if p.form != nil {
970                                 // Ignore the token.
971                                 return true
972                         }
973                         action := ""
974                         prompt := "This is a searchable index. Enter search keywords: "
975                         attr := []Attribute{{Key: "name", Val: "isindex"}}
976                         for _, t := range p.tok.Attr {
977                                 switch t.Key {
978                                 case "action":
979                                         action = t.Val
980                                 case "name":
981                                         // Ignore the attribute.
982                                 case "prompt":
983                                         prompt = t.Val
984                                 default:
985                                         attr = append(attr, t)
986                                 }
987                         }
988                         p.acknowledgeSelfClosingTag()
989                         p.popUntil(buttonScope, a.P)
990                         p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
991                         if p.form == nil {
992                                 // NOTE: The 'isindex' element has been removed,
993                                 // and the 'template' element has not been designed to be
994                                 // collaborative with the index element.
995                                 //
996                                 // Ignore the token.
997                                 return true
998                         }
999                         if action != "" {
1000                                 p.form.Attr = []Attribute{{Key: "action", Val: action}}
1001                         }
1002                         p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
1003                         p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
1004                         p.addText(prompt)
1005                         p.addChild(&Node{
1006                                 Type:     ElementNode,
1007                                 DataAtom: a.Input,
1008                                 Data:     a.Input.String(),
1009                                 Attr:     attr,
1010                         })
1011                         p.oe.pop()
1012                         p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
1013                         p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
1014                         p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
1015                 case a.Textarea:
1016                         p.addElement()
1017                         p.setOriginalIM()
1018                         p.framesetOK = false
1019                         p.im = textIM
1020                 case a.Xmp:
1021                         p.popUntil(buttonScope, a.P)
1022                         p.reconstructActiveFormattingElements()
1023                         p.framesetOK = false
1024                         p.addElement()
1025                         p.setOriginalIM()
1026                         p.im = textIM
1027                 case a.Iframe:
1028                         p.framesetOK = false
1029                         p.addElement()
1030                         p.setOriginalIM()
1031                         p.im = textIM
1032                 case a.Noembed, a.Noscript:
1033                         p.addElement()
1034                         p.setOriginalIM()
1035                         p.im = textIM
1036                 case a.Select:
1037                         p.reconstructActiveFormattingElements()
1038                         p.addElement()
1039                         p.framesetOK = false
1040                         p.im = inSelectIM
1041                         return true
1042                 case a.Optgroup, a.Option:
1043                         if p.top().DataAtom == a.Option {
1044                                 p.oe.pop()
1045                         }
1046                         p.reconstructActiveFormattingElements()
1047                         p.addElement()
1048                 case a.Rb, a.Rtc:
1049                         if p.elementInScope(defaultScope, a.Ruby) {
1050                                 p.generateImpliedEndTags()
1051                         }
1052                         p.addElement()
1053                 case a.Rp, a.Rt:
1054                         if p.elementInScope(defaultScope, a.Ruby) {
1055                                 p.generateImpliedEndTags("rtc")
1056                         }
1057                         p.addElement()
1058                 case a.Math, a.Svg:
1059                         p.reconstructActiveFormattingElements()
1060                         if p.tok.DataAtom == a.Math {
1061                                 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1062                         } else {
1063                                 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1064                         }
1065                         adjustForeignAttributes(p.tok.Attr)
1066                         p.addElement()
1067                         p.top().Namespace = p.tok.Data
1068                         if p.hasSelfClosingToken {
1069                                 p.oe.pop()
1070                                 p.acknowledgeSelfClosingTag()
1071                         }
1072                         return true
1073                 case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1074                         // Ignore the token.
1075                 default:
1076                         p.reconstructActiveFormattingElements()
1077                         p.addElement()
1078                 }
1079         case EndTagToken:
1080                 switch p.tok.DataAtom {
1081                 case a.Body:
1082                         if p.elementInScope(defaultScope, a.Body) {
1083                                 p.im = afterBodyIM
1084                         }
1085                 case a.Html:
1086                         if p.elementInScope(defaultScope, a.Body) {
1087                                 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1088                                 return false
1089                         }
1090                         return true
1091                 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1092                         p.popUntil(defaultScope, p.tok.DataAtom)
1093                 case a.Form:
1094                         if p.oe.contains(a.Template) {
1095                                 i := p.indexOfElementInScope(defaultScope, a.Form)
1096                                 if i == -1 {
1097                                         // Ignore the token.
1098                                         return true
1099                                 }
1100                                 p.generateImpliedEndTags()
1101                                 if p.oe[i].DataAtom != a.Form {
1102                                         // Ignore the token.
1103                                         return true
1104                                 }
1105                                 p.popUntil(defaultScope, a.Form)
1106                         } else {
1107                                 node := p.form
1108                                 p.form = nil
1109                                 i := p.indexOfElementInScope(defaultScope, a.Form)
1110                                 if node == nil || i == -1 || p.oe[i] != node {
1111                                         // Ignore the token.
1112                                         return true
1113                                 }
1114                                 p.generateImpliedEndTags()
1115                                 p.oe.remove(node)
1116                         }
1117                 case a.P:
1118                         if !p.elementInScope(buttonScope, a.P) {
1119                                 p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1120                         }
1121                         p.popUntil(buttonScope, a.P)
1122                 case a.Li:
1123                         p.popUntil(listItemScope, a.Li)
1124                 case a.Dd, a.Dt:
1125                         p.popUntil(defaultScope, p.tok.DataAtom)
1126                 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1127                         p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1128                 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1129                         p.inBodyEndTagFormatting(p.tok.DataAtom)
1130                 case a.Applet, a.Marquee, a.Object:
1131                         if p.popUntil(defaultScope, p.tok.DataAtom) {
1132                                 p.clearActiveFormattingElements()
1133                         }
1134                 case a.Br:
1135                         p.tok.Type = StartTagToken
1136                         return false
1137                 case a.Template:
1138                         return inHeadIM(p)
1139                 default:
1140                         p.inBodyEndTagOther(p.tok.DataAtom)
1141                 }
1142         case CommentToken:
1143                 p.addChild(&Node{
1144                         Type: CommentNode,
1145                         Data: p.tok.Data,
1146                 })
1147         case ErrorToken:
1148                 // TODO: remove this divergence from the HTML5 spec.
1149                 if len(p.templateStack) > 0 {
1150                         p.im = inTemplateIM
1151                         return false
1152                 } else {
1153                         for _, e := range p.oe {
1154                                 switch e.DataAtom {
1155                                 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1156                                         a.Thead, a.Tr, a.Body, a.Html:
1157                                 default:
1158                                         return true
1159                                 }
1160                         }
1161                 }
1162         }
1163
1164         return true
1165 }
1166
1167 func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
1168         // This is the "adoption agency" algorithm, described at
1169         // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1170
1171         // TODO: this is a fairly literal line-by-line translation of that algorithm.
1172         // Once the code successfully parses the comprehensive test suite, we should
1173         // refactor this code to be more idiomatic.
1174
1175         // Steps 1-4. The outer loop.
1176         for i := 0; i < 8; i++ {
1177                 // Step 5. Find the formatting element.
1178                 var formattingElement *Node
1179                 for j := len(p.afe) - 1; j >= 0; j-- {
1180                         if p.afe[j].Type == scopeMarkerNode {
1181                                 break
1182                         }
1183                         if p.afe[j].DataAtom == tagAtom {
1184                                 formattingElement = p.afe[j]
1185                                 break
1186                         }
1187                 }
1188                 if formattingElement == nil {
1189                         p.inBodyEndTagOther(tagAtom)
1190                         return
1191                 }
1192                 feIndex := p.oe.index(formattingElement)
1193                 if feIndex == -1 {
1194                         p.afe.remove(formattingElement)
1195                         return
1196                 }
1197                 if !p.elementInScope(defaultScope, tagAtom) {
1198                         // Ignore the tag.
1199                         return
1200                 }
1201
1202                 // Steps 9-10. Find the furthest block.
1203                 var furthestBlock *Node
1204                 for _, e := range p.oe[feIndex:] {
1205                         if isSpecialElement(e) {
1206                                 furthestBlock = e
1207                                 break
1208                         }
1209                 }
1210                 if furthestBlock == nil {
1211                         e := p.oe.pop()
1212                         for e != formattingElement {
1213                                 e = p.oe.pop()
1214                         }
1215                         p.afe.remove(e)
1216                         return
1217                 }
1218
1219                 // Steps 11-12. Find the common ancestor and bookmark node.
1220                 commonAncestor := p.oe[feIndex-1]
1221                 bookmark := p.afe.index(formattingElement)
1222
1223                 // Step 13. The inner loop. Find the lastNode to reparent.
1224                 lastNode := furthestBlock
1225                 node := furthestBlock
1226                 x := p.oe.index(node)
1227                 // Steps 13.1-13.2
1228                 for j := 0; j < 3; j++ {
1229                         // Step 13.3.
1230                         x--
1231                         node = p.oe[x]
1232                         // Step 13.4 - 13.5.
1233                         if p.afe.index(node) == -1 {
1234                                 p.oe.remove(node)
1235                                 continue
1236                         }
1237                         // Step 13.6.
1238                         if node == formattingElement {
1239                                 break
1240                         }
1241                         // Step 13.7.
1242                         clone := node.clone()
1243                         p.afe[p.afe.index(node)] = clone
1244                         p.oe[p.oe.index(node)] = clone
1245                         node = clone
1246                         // Step 13.8.
1247                         if lastNode == furthestBlock {
1248                                 bookmark = p.afe.index(node) + 1
1249                         }
1250                         // Step 13.9.
1251                         if lastNode.Parent != nil {
1252                                 lastNode.Parent.RemoveChild(lastNode)
1253                         }
1254                         node.AppendChild(lastNode)
1255                         // Step 13.10.
1256                         lastNode = node
1257                 }
1258
1259                 // Step 14. Reparent lastNode to the common ancestor,
1260                 // or for misnested table nodes, to the foster parent.
1261                 if lastNode.Parent != nil {
1262                         lastNode.Parent.RemoveChild(lastNode)
1263                 }
1264                 switch commonAncestor.DataAtom {
1265                 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1266                         p.fosterParent(lastNode)
1267                 case a.Template:
1268                         // TODO: remove namespace checking
1269                         if commonAncestor.Namespace == "html" {
1270                                 commonAncestor = commonAncestor.LastChild
1271                         }
1272                         fallthrough
1273                 default:
1274                         commonAncestor.AppendChild(lastNode)
1275                 }
1276
1277                 // Steps 15-17. Reparent nodes from the furthest block's children
1278                 // to a clone of the formatting element.
1279                 clone := formattingElement.clone()
1280                 reparentChildren(clone, furthestBlock)
1281                 furthestBlock.AppendChild(clone)
1282
1283                 // Step 18. Fix up the list of active formatting elements.
1284                 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1285                         // Move the bookmark with the rest of the list.
1286                         bookmark--
1287                 }
1288                 p.afe.remove(formattingElement)
1289                 p.afe.insert(bookmark, clone)
1290
1291                 // Step 19. Fix up the stack of open elements.
1292                 p.oe.remove(formattingElement)
1293                 p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1294         }
1295 }
1296
1297 // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1298 // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1299 // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1300 func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
1301         for i := len(p.oe) - 1; i >= 0; i-- {
1302                 if p.oe[i].DataAtom == tagAtom {
1303                         p.oe = p.oe[:i]
1304                         break
1305                 }
1306                 if isSpecialElement(p.oe[i]) {
1307                         break
1308                 }
1309         }
1310 }
1311
1312 // Section 12.2.6.4.8.
1313 func textIM(p *parser) bool {
1314         switch p.tok.Type {
1315         case ErrorToken:
1316                 p.oe.pop()
1317         case TextToken:
1318                 d := p.tok.Data
1319                 if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1320                         // Ignore a newline at the start of a <textarea> block.
1321                         if d != "" && d[0] == '\r' {
1322                                 d = d[1:]
1323                         }
1324                         if d != "" && d[0] == '\n' {
1325                                 d = d[1:]
1326                         }
1327                 }
1328                 if d == "" {
1329                         return true
1330                 }
1331                 p.addText(d)
1332                 return true
1333         case EndTagToken:
1334                 p.oe.pop()
1335         }
1336         p.im = p.originalIM
1337         p.originalIM = nil
1338         return p.tok.Type == EndTagToken
1339 }
1340
1341 // Section 12.2.6.4.9.
1342 func inTableIM(p *parser) bool {
1343         switch p.tok.Type {
1344         case TextToken:
1345                 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1346                 switch p.oe.top().DataAtom {
1347                 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1348                         if strings.Trim(p.tok.Data, whitespace) == "" {
1349                                 p.addText(p.tok.Data)
1350                                 return true
1351                         }
1352                 }
1353         case StartTagToken:
1354                 switch p.tok.DataAtom {
1355                 case a.Caption:
1356                         p.clearStackToContext(tableScope)
1357                         p.afe = append(p.afe, &scopeMarker)
1358                         p.addElement()
1359                         p.im = inCaptionIM
1360                         return true
1361                 case a.Colgroup:
1362                         p.clearStackToContext(tableScope)
1363                         p.addElement()
1364                         p.im = inColumnGroupIM
1365                         return true
1366                 case a.Col:
1367                         p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1368                         return false
1369                 case a.Tbody, a.Tfoot, a.Thead:
1370                         p.clearStackToContext(tableScope)
1371                         p.addElement()
1372                         p.im = inTableBodyIM
1373                         return true
1374                 case a.Td, a.Th, a.Tr:
1375                         p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1376                         return false
1377                 case a.Table:
1378                         if p.popUntil(tableScope, a.Table) {
1379                                 p.resetInsertionMode()
1380                                 return false
1381                         }
1382                         // Ignore the token.
1383                         return true
1384                 case a.Style, a.Script, a.Template:
1385                         return inHeadIM(p)
1386                 case a.Input:
1387                         for _, t := range p.tok.Attr {
1388                                 if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1389                                         p.addElement()
1390                                         p.oe.pop()
1391                                         return true
1392                                 }
1393                         }
1394                         // Otherwise drop down to the default action.
1395                 case a.Form:
1396                         if p.oe.contains(a.Template) || p.form != nil {
1397                                 // Ignore the token.
1398                                 return true
1399                         }
1400                         p.addElement()
1401                         p.form = p.oe.pop()
1402                 case a.Select:
1403                         p.reconstructActiveFormattingElements()
1404                         switch p.top().DataAtom {
1405                         case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1406                                 p.fosterParenting = true
1407                         }
1408                         p.addElement()
1409                         p.fosterParenting = false
1410                         p.framesetOK = false
1411                         p.im = inSelectInTableIM
1412                         return true
1413                 }
1414         case EndTagToken:
1415                 switch p.tok.DataAtom {
1416                 case a.Table:
1417                         if p.popUntil(tableScope, a.Table) {
1418                                 p.resetInsertionMode()
1419                                 return true
1420                         }
1421                         // Ignore the token.
1422                         return true
1423                 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1424                         // Ignore the token.
1425                         return true
1426                 case a.Template:
1427                         return inHeadIM(p)
1428                 }
1429         case CommentToken:
1430                 p.addChild(&Node{
1431                         Type: CommentNode,
1432                         Data: p.tok.Data,
1433                 })
1434                 return true
1435         case DoctypeToken:
1436                 // Ignore the token.
1437                 return true
1438         case ErrorToken:
1439                 return inBodyIM(p)
1440         }
1441
1442         p.fosterParenting = true
1443         defer func() { p.fosterParenting = false }()
1444
1445         return inBodyIM(p)
1446 }
1447
1448 // Section 12.2.6.4.11.
1449 func inCaptionIM(p *parser) bool {
1450         switch p.tok.Type {
1451         case StartTagToken:
1452                 switch p.tok.DataAtom {
1453                 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1454                         if p.popUntil(tableScope, a.Caption) {
1455                                 p.clearActiveFormattingElements()
1456                                 p.im = inTableIM
1457                                 return false
1458                         } else {
1459                                 // Ignore the token.
1460                                 return true
1461                         }
1462                 case a.Select:
1463                         p.reconstructActiveFormattingElements()
1464                         p.addElement()
1465                         p.framesetOK = false
1466                         p.im = inSelectInTableIM
1467                         return true
1468                 }
1469         case EndTagToken:
1470                 switch p.tok.DataAtom {
1471                 case a.Caption:
1472                         if p.popUntil(tableScope, a.Caption) {
1473                                 p.clearActiveFormattingElements()
1474                                 p.im = inTableIM
1475                         }
1476                         return true
1477                 case a.Table:
1478                         if p.popUntil(tableScope, a.Caption) {
1479                                 p.clearActiveFormattingElements()
1480                                 p.im = inTableIM
1481                                 return false
1482                         } else {
1483                                 // Ignore the token.
1484                                 return true
1485                         }
1486                 case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1487                         // Ignore the token.
1488                         return true
1489                 }
1490         }
1491         return inBodyIM(p)
1492 }
1493
1494 // Section 12.2.6.4.12.
1495 func inColumnGroupIM(p *parser) bool {
1496         switch p.tok.Type {
1497         case TextToken:
1498                 s := strings.TrimLeft(p.tok.Data, whitespace)
1499                 if len(s) < len(p.tok.Data) {
1500                         // Add the initial whitespace to the current node.
1501                         p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1502                         if s == "" {
1503                                 return true
1504                         }
1505                         p.tok.Data = s
1506                 }
1507         case CommentToken:
1508                 p.addChild(&Node{
1509                         Type: CommentNode,
1510                         Data: p.tok.Data,
1511                 })
1512                 return true
1513         case DoctypeToken:
1514                 // Ignore the token.
1515                 return true
1516         case StartTagToken:
1517                 switch p.tok.DataAtom {
1518                 case a.Html:
1519                         return inBodyIM(p)
1520                 case a.Col:
1521                         p.addElement()
1522                         p.oe.pop()
1523                         p.acknowledgeSelfClosingTag()
1524                         return true
1525                 case a.Template:
1526                         return inHeadIM(p)
1527                 }
1528         case EndTagToken:
1529                 switch p.tok.DataAtom {
1530                 case a.Colgroup:
1531                         if p.oe.top().DataAtom == a.Colgroup {
1532                                 p.oe.pop()
1533                                 p.im = inTableIM
1534                         }
1535                         return true
1536                 case a.Col:
1537                         // Ignore the token.
1538                         return true
1539                 case a.Template:
1540                         return inHeadIM(p)
1541                 }
1542         case ErrorToken:
1543                 return inBodyIM(p)
1544         }
1545         if p.oe.top().DataAtom != a.Colgroup {
1546                 return true
1547         }
1548         p.oe.pop()
1549         p.im = inTableIM
1550         return false
1551 }
1552
1553 // Section 12.2.6.4.13.
1554 func inTableBodyIM(p *parser) bool {
1555         switch p.tok.Type {
1556         case StartTagToken:
1557                 switch p.tok.DataAtom {
1558                 case a.Tr:
1559                         p.clearStackToContext(tableBodyScope)
1560                         p.addElement()
1561                         p.im = inRowIM
1562                         return true
1563                 case a.Td, a.Th:
1564                         p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1565                         return false
1566                 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1567                         if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1568                                 p.im = inTableIM
1569                                 return false
1570                         }
1571                         // Ignore the token.
1572                         return true
1573                 }
1574         case EndTagToken:
1575                 switch p.tok.DataAtom {
1576                 case a.Tbody, a.Tfoot, a.Thead:
1577                         if p.elementInScope(tableScope, p.tok.DataAtom) {
1578                                 p.clearStackToContext(tableBodyScope)
1579                                 p.oe.pop()
1580                                 p.im = inTableIM
1581                         }
1582                         return true
1583                 case a.Table:
1584                         if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1585                                 p.im = inTableIM
1586                                 return false
1587                         }
1588                         // Ignore the token.
1589                         return true
1590                 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1591                         // Ignore the token.
1592                         return true
1593                 }
1594         case CommentToken:
1595                 p.addChild(&Node{
1596                         Type: CommentNode,
1597                         Data: p.tok.Data,
1598                 })
1599                 return true
1600         }
1601
1602         return inTableIM(p)
1603 }
1604
1605 // Section 12.2.6.4.14.
1606 func inRowIM(p *parser) bool {
1607         switch p.tok.Type {
1608         case StartTagToken:
1609                 switch p.tok.DataAtom {
1610                 case a.Td, a.Th:
1611                         p.clearStackToContext(tableRowScope)
1612                         p.addElement()
1613                         p.afe = append(p.afe, &scopeMarker)
1614                         p.im = inCellIM
1615                         return true
1616                 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1617                         if p.popUntil(tableScope, a.Tr) {
1618                                 p.im = inTableBodyIM
1619                                 return false
1620                         }
1621                         // Ignore the token.
1622                         return true
1623                 }
1624         case EndTagToken:
1625                 switch p.tok.DataAtom {
1626                 case a.Tr:
1627                         if p.popUntil(tableScope, a.Tr) {
1628                                 p.im = inTableBodyIM
1629                                 return true
1630                         }
1631                         // Ignore the token.
1632                         return true
1633                 case a.Table:
1634                         if p.popUntil(tableScope, a.Tr) {
1635                                 p.im = inTableBodyIM
1636                                 return false
1637                         }
1638                         // Ignore the token.
1639                         return true
1640                 case a.Tbody, a.Tfoot, a.Thead:
1641                         if p.elementInScope(tableScope, p.tok.DataAtom) {
1642                                 p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1643                                 return false
1644                         }
1645                         // Ignore the token.
1646                         return true
1647                 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1648                         // Ignore the token.
1649                         return true
1650                 }
1651         }
1652
1653         return inTableIM(p)
1654 }
1655
1656 // Section 12.2.6.4.15.
1657 func inCellIM(p *parser) bool {
1658         switch p.tok.Type {
1659         case StartTagToken:
1660                 switch p.tok.DataAtom {
1661                 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1662                         if p.popUntil(tableScope, a.Td, a.Th) {
1663                                 // Close the cell and reprocess.
1664                                 p.clearActiveFormattingElements()
1665                                 p.im = inRowIM
1666                                 return false
1667                         }
1668                         // Ignore the token.
1669                         return true
1670                 case a.Select:
1671                         p.reconstructActiveFormattingElements()
1672                         p.addElement()
1673                         p.framesetOK = false
1674                         p.im = inSelectInTableIM
1675                         return true
1676                 }
1677         case EndTagToken:
1678                 switch p.tok.DataAtom {
1679                 case a.Td, a.Th:
1680                         if !p.popUntil(tableScope, p.tok.DataAtom) {
1681                                 // Ignore the token.
1682                                 return true
1683                         }
1684                         p.clearActiveFormattingElements()
1685                         p.im = inRowIM
1686                         return true
1687                 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1688                         // Ignore the token.
1689                         return true
1690                 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1691                         if !p.elementInScope(tableScope, p.tok.DataAtom) {
1692                                 // Ignore the token.
1693                                 return true
1694                         }
1695                         // Close the cell and reprocess.
1696                         p.popUntil(tableScope, a.Td, a.Th)
1697                         p.clearActiveFormattingElements()
1698                         p.im = inRowIM
1699                         return false
1700                 }
1701         }
1702         return inBodyIM(p)
1703 }
1704
1705 // Section 12.2.6.4.16.
1706 func inSelectIM(p *parser) bool {
1707         switch p.tok.Type {
1708         case TextToken:
1709                 p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1710         case StartTagToken:
1711                 switch p.tok.DataAtom {
1712                 case a.Html:
1713                         return inBodyIM(p)
1714                 case a.Option:
1715                         if p.top().DataAtom == a.Option {
1716                                 p.oe.pop()
1717                         }
1718                         p.addElement()
1719                 case a.Optgroup:
1720                         if p.top().DataAtom == a.Option {
1721                                 p.oe.pop()
1722                         }
1723                         if p.top().DataAtom == a.Optgroup {
1724                                 p.oe.pop()
1725                         }
1726                         p.addElement()
1727                 case a.Select:
1728                         p.tok.Type = EndTagToken
1729                         return false
1730                 case a.Input, a.Keygen, a.Textarea:
1731                         if p.elementInScope(selectScope, a.Select) {
1732                                 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1733                                 return false
1734                         }
1735                         // In order to properly ignore <textarea>, we need to change the tokenizer mode.
1736                         p.tokenizer.NextIsNotRawText()
1737                         // Ignore the token.
1738                         return true
1739                 case a.Script, a.Template:
1740                         return inHeadIM(p)
1741                 }
1742         case EndTagToken:
1743                 switch p.tok.DataAtom {
1744                 case a.Option:
1745                         if p.top().DataAtom == a.Option {
1746                                 p.oe.pop()
1747                         }
1748                 case a.Optgroup:
1749                         i := len(p.oe) - 1
1750                         if p.oe[i].DataAtom == a.Option {
1751                                 i--
1752                         }
1753                         if p.oe[i].DataAtom == a.Optgroup {
1754                                 p.oe = p.oe[:i]
1755                         }
1756                 case a.Select:
1757                         if p.popUntil(selectScope, a.Select) {
1758                                 p.resetInsertionMode()
1759                         }
1760                 case a.Template:
1761                         return inHeadIM(p)
1762                 }
1763         case CommentToken:
1764                 p.addChild(&Node{
1765                         Type: CommentNode,
1766                         Data: p.tok.Data,
1767                 })
1768         case DoctypeToken:
1769                 // Ignore the token.
1770                 return true
1771         case ErrorToken:
1772                 return inBodyIM(p)
1773         }
1774
1775         return true
1776 }
1777
1778 // Section 12.2.6.4.17.
1779 func inSelectInTableIM(p *parser) bool {
1780         switch p.tok.Type {
1781         case StartTagToken, EndTagToken:
1782                 switch p.tok.DataAtom {
1783                 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1784                         if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
1785                                 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1786                                 return false
1787                         } else {
1788                                 // Ignore the token.
1789                                 return true
1790                         }
1791                 }
1792         }
1793         return inSelectIM(p)
1794 }
1795
1796 // Section 12.2.6.4.18.
1797 func inTemplateIM(p *parser) bool {
1798         switch p.tok.Type {
1799         case TextToken, CommentToken, DoctypeToken:
1800                 return inBodyIM(p)
1801         case StartTagToken:
1802                 switch p.tok.DataAtom {
1803                 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1804                         return inHeadIM(p)
1805                 case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1806                         p.templateStack.pop()
1807                         p.templateStack = append(p.templateStack, inTableIM)
1808                         p.im = inTableIM
1809                         return false
1810                 case a.Col:
1811                         p.templateStack.pop()
1812                         p.templateStack = append(p.templateStack, inColumnGroupIM)
1813                         p.im = inColumnGroupIM
1814                         return false
1815                 case a.Tr:
1816                         p.templateStack.pop()
1817                         p.templateStack = append(p.templateStack, inTableBodyIM)
1818                         p.im = inTableBodyIM
1819                         return false
1820                 case a.Td, a.Th:
1821                         p.templateStack.pop()
1822                         p.templateStack = append(p.templateStack, inRowIM)
1823                         p.im = inRowIM
1824                         return false
1825                 default:
1826                         p.templateStack.pop()
1827                         p.templateStack = append(p.templateStack, inBodyIM)
1828                         p.im = inBodyIM
1829                         return false
1830                 }
1831         case EndTagToken:
1832                 switch p.tok.DataAtom {
1833                 case a.Template:
1834                         return inHeadIM(p)
1835                 default:
1836                         // Ignore the token.
1837                         return true
1838                 }
1839         case ErrorToken:
1840                 if !p.oe.contains(a.Template) {
1841                         // Ignore the token.
1842                         return true
1843                 }
1844                 // TODO: remove this divergence from the HTML5 spec.
1845                 //
1846                 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1847                 p.generateImpliedEndTags()
1848                 for i := len(p.oe) - 1; i >= 0; i-- {
1849                         if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1850                                 p.oe = p.oe[:i]
1851                                 break
1852                         }
1853                 }
1854                 p.clearActiveFormattingElements()
1855                 p.templateStack.pop()
1856                 p.resetInsertionMode()
1857                 return false
1858         }
1859         return false
1860 }
1861
1862 // Section 12.2.6.4.19.
1863 func afterBodyIM(p *parser) bool {
1864         switch p.tok.Type {
1865         case ErrorToken:
1866                 // Stop parsing.
1867                 return true
1868         case TextToken:
1869                 s := strings.TrimLeft(p.tok.Data, whitespace)
1870                 if len(s) == 0 {
1871                         // It was all whitespace.
1872                         return inBodyIM(p)
1873                 }
1874         case StartTagToken:
1875                 if p.tok.DataAtom == a.Html {
1876                         return inBodyIM(p)
1877                 }
1878         case EndTagToken:
1879                 if p.tok.DataAtom == a.Html {
1880                         if !p.fragment {
1881                                 p.im = afterAfterBodyIM
1882                         }
1883                         return true
1884                 }
1885         case CommentToken:
1886                 // The comment is attached to the <html> element.
1887                 if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1888                         panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1889                 }
1890                 p.oe[0].AppendChild(&Node{
1891                         Type: CommentNode,
1892                         Data: p.tok.Data,
1893                 })
1894                 return true
1895         }
1896         p.im = inBodyIM
1897         return false
1898 }
1899
1900 // Section 12.2.6.4.20.
1901 func inFramesetIM(p *parser) bool {
1902         switch p.tok.Type {
1903         case CommentToken:
1904                 p.addChild(&Node{
1905                         Type: CommentNode,
1906                         Data: p.tok.Data,
1907                 })
1908         case TextToken:
1909                 // Ignore all text but whitespace.
1910                 s := strings.Map(func(c rune) rune {
1911                         switch c {
1912                         case ' ', '\t', '\n', '\f', '\r':
1913                                 return c
1914                         }
1915                         return -1
1916                 }, p.tok.Data)
1917                 if s != "" {
1918                         p.addText(s)
1919                 }
1920         case StartTagToken:
1921                 switch p.tok.DataAtom {
1922                 case a.Html:
1923                         return inBodyIM(p)
1924                 case a.Frameset:
1925                         p.addElement()
1926                 case a.Frame:
1927                         p.addElement()
1928                         p.oe.pop()
1929                         p.acknowledgeSelfClosingTag()
1930                 case a.Noframes:
1931                         return inHeadIM(p)
1932                 }
1933         case EndTagToken:
1934                 switch p.tok.DataAtom {
1935                 case a.Frameset:
1936                         if p.oe.top().DataAtom != a.Html {
1937                                 p.oe.pop()
1938                                 if p.oe.top().DataAtom != a.Frameset {
1939                                         p.im = afterFramesetIM
1940                                         return true
1941                                 }
1942                         }
1943                 }
1944         default:
1945                 // Ignore the token.
1946         }
1947         return true
1948 }
1949
1950 // Section 12.2.6.4.21.
1951 func afterFramesetIM(p *parser) bool {
1952         switch p.tok.Type {
1953         case CommentToken:
1954                 p.addChild(&Node{
1955                         Type: CommentNode,
1956                         Data: p.tok.Data,
1957                 })
1958         case TextToken:
1959                 // Ignore all text but whitespace.
1960                 s := strings.Map(func(c rune) rune {
1961                         switch c {
1962                         case ' ', '\t', '\n', '\f', '\r':
1963                                 return c
1964                         }
1965                         return -1
1966                 }, p.tok.Data)
1967                 if s != "" {
1968                         p.addText(s)
1969                 }
1970         case StartTagToken:
1971                 switch p.tok.DataAtom {
1972                 case a.Html:
1973                         return inBodyIM(p)
1974                 case a.Noframes:
1975                         return inHeadIM(p)
1976                 }
1977         case EndTagToken:
1978                 switch p.tok.DataAtom {
1979                 case a.Html:
1980                         p.im = afterAfterFramesetIM
1981                         return true
1982                 }
1983         default:
1984                 // Ignore the token.
1985         }
1986         return true
1987 }
1988
1989 // Section 12.2.6.4.22.
1990 func afterAfterBodyIM(p *parser) bool {
1991         switch p.tok.Type {
1992         case ErrorToken:
1993                 // Stop parsing.
1994                 return true
1995         case TextToken:
1996                 s := strings.TrimLeft(p.tok.Data, whitespace)
1997                 if len(s) == 0 {
1998                         // It was all whitespace.
1999                         return inBodyIM(p)
2000                 }
2001         case StartTagToken:
2002                 if p.tok.DataAtom == a.Html {
2003                         return inBodyIM(p)
2004                 }
2005         case CommentToken:
2006                 p.doc.AppendChild(&Node{
2007                         Type: CommentNode,
2008                         Data: p.tok.Data,
2009                 })
2010                 return true
2011         case DoctypeToken:
2012                 return inBodyIM(p)
2013         }
2014         p.im = inBodyIM
2015         return false
2016 }
2017
2018 // Section 12.2.6.4.23.
2019 func afterAfterFramesetIM(p *parser) bool {
2020         switch p.tok.Type {
2021         case CommentToken:
2022                 p.doc.AppendChild(&Node{
2023                         Type: CommentNode,
2024                         Data: p.tok.Data,
2025                 })
2026         case TextToken:
2027                 // Ignore all text but whitespace.
2028                 s := strings.Map(func(c rune) rune {
2029                         switch c {
2030                         case ' ', '\t', '\n', '\f', '\r':
2031                                 return c
2032                         }
2033                         return -1
2034                 }, p.tok.Data)
2035                 if s != "" {
2036                         p.tok.Data = s
2037                         return inBodyIM(p)
2038                 }
2039         case StartTagToken:
2040                 switch p.tok.DataAtom {
2041                 case a.Html:
2042                         return inBodyIM(p)
2043                 case a.Noframes:
2044                         return inHeadIM(p)
2045                 }
2046         case DoctypeToken:
2047                 return inBodyIM(p)
2048         default:
2049                 // Ignore the token.
2050         }
2051         return true
2052 }
2053
2054 const whitespaceOrNUL = whitespace + "\x00"
2055
2056 // Section 12.2.6.5
2057 func parseForeignContent(p *parser) bool {
2058         switch p.tok.Type {
2059         case TextToken:
2060                 if p.framesetOK {
2061                         p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2062                 }
2063                 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2064                 p.addText(p.tok.Data)
2065         case CommentToken:
2066                 p.addChild(&Node{
2067                         Type: CommentNode,
2068                         Data: p.tok.Data,
2069                 })
2070         case StartTagToken:
2071                 b := breakout[p.tok.Data]
2072                 if p.tok.DataAtom == a.Font {
2073                 loop:
2074                         for _, attr := range p.tok.Attr {
2075                                 switch attr.Key {
2076                                 case "color", "face", "size":
2077                                         b = true
2078                                         break loop
2079                                 }
2080                         }
2081                 }
2082                 if b {
2083                         for i := len(p.oe) - 1; i >= 0; i-- {
2084                                 n := p.oe[i]
2085                                 if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2086                                         p.oe = p.oe[:i+1]
2087                                         break
2088                                 }
2089                         }
2090                         return false
2091                 }
2092                 switch p.top().Namespace {
2093                 case "math":
2094                         adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2095                 case "svg":
2096                         // Adjust SVG tag names. The tokenizer lower-cases tag names, but
2097                         // SVG wants e.g. "foreignObject" with a capital second "O".
2098                         if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2099                                 p.tok.DataAtom = a.Lookup([]byte(x))
2100                                 p.tok.Data = x
2101                         }
2102                         adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2103                 default:
2104                         panic("html: bad parser state: unexpected namespace")
2105                 }
2106                 adjustForeignAttributes(p.tok.Attr)
2107                 namespace := p.top().Namespace
2108                 p.addElement()
2109                 p.top().Namespace = namespace
2110                 if namespace != "" {
2111                         // Don't let the tokenizer go into raw text mode in foreign content
2112                         // (e.g. in an SVG <title> tag).
2113                         p.tokenizer.NextIsNotRawText()
2114                 }
2115                 if p.hasSelfClosingToken {
2116                         p.oe.pop()
2117                         p.acknowledgeSelfClosingTag()
2118                 }
2119         case EndTagToken:
2120                 for i := len(p.oe) - 1; i >= 0; i-- {
2121                         if p.oe[i].Namespace == "" {
2122                                 return p.im(p)
2123                         }
2124                         if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2125                                 p.oe = p.oe[:i]
2126                                 break
2127                         }
2128                 }
2129                 return true
2130         default:
2131                 // Ignore the token.
2132         }
2133         return true
2134 }
2135
2136 // Section 12.2.6.
2137 func (p *parser) inForeignContent() bool {
2138         if len(p.oe) == 0 {
2139                 return false
2140         }
2141         n := p.oe[len(p.oe)-1]
2142         if n.Namespace == "" {
2143                 return false
2144         }
2145         if mathMLTextIntegrationPoint(n) {
2146                 if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2147                         return false
2148                 }
2149                 if p.tok.Type == TextToken {
2150                         return false
2151                 }
2152         }
2153         if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2154                 return false
2155         }
2156         if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2157                 return false
2158         }
2159         if p.tok.Type == ErrorToken {
2160                 return false
2161         }
2162         return true
2163 }
2164
2165 // parseImpliedToken parses a token as though it had appeared in the parser's
2166 // input.
2167 func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2168         realToken, selfClosing := p.tok, p.hasSelfClosingToken
2169         p.tok = Token{
2170                 Type:     t,
2171                 DataAtom: dataAtom,
2172                 Data:     data,
2173         }
2174         p.hasSelfClosingToken = false
2175         p.parseCurrentToken()
2176         p.tok, p.hasSelfClosingToken = realToken, selfClosing
2177 }
2178
2179 // parseCurrentToken runs the current token through the parsing routines
2180 // until it is consumed.
2181 func (p *parser) parseCurrentToken() {
2182         if p.tok.Type == SelfClosingTagToken {
2183                 p.hasSelfClosingToken = true
2184                 p.tok.Type = StartTagToken
2185         }
2186
2187         consumed := false
2188         for !consumed {
2189                 if p.inForeignContent() {
2190                         consumed = parseForeignContent(p)
2191                 } else {
2192                         consumed = p.im(p)
2193                 }
2194         }
2195
2196         if p.hasSelfClosingToken {
2197                 // This is a parse error, but ignore it.
2198                 p.hasSelfClosingToken = false
2199         }
2200 }
2201
2202 func (p *parser) parse() error {
2203         // Iterate until EOF. Any other error will cause an early return.
2204         var err error
2205         for err != io.EOF {
2206                 // CDATA sections are allowed only in foreign content.
2207                 n := p.oe.top()
2208                 p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2209                 // Read and parse the next token.
2210                 p.tokenizer.Next()
2211                 p.tok = p.tokenizer.Token()
2212                 if p.tok.Type == ErrorToken {
2213                         err = p.tokenizer.Err()
2214                         if err != nil && err != io.EOF {
2215                                 return err
2216                         }
2217                 }
2218                 p.parseCurrentToken()
2219         }
2220         return nil
2221 }
2222
2223 // Parse returns the parse tree for the HTML from the given Reader.
2224 //
2225 // It implements the HTML5 parsing algorithm
2226 // (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2227 // which is very complicated. The resultant tree can contain implicitly created
2228 // nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2229 // differ from the nesting implied by a naive processing of start and end
2230 // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2231 // with no corresponding node in the resulting tree.
2232 //
2233 // The input is assumed to be UTF-8 encoded.
2234 func Parse(r io.Reader) (*Node, error) {
2235         p := &parser{
2236                 tokenizer: NewTokenizer(r),
2237                 doc: &Node{
2238                         Type: DocumentNode,
2239                 },
2240                 scripting:  true,
2241                 framesetOK: true,
2242                 im:         initialIM,
2243         }
2244         err := p.parse()
2245         if err != nil {
2246                 return nil, err
2247         }
2248         return p.doc, nil
2249 }
2250
2251 // ParseFragment parses a fragment of HTML and returns the nodes that were
2252 // found. If the fragment is the InnerHTML for an existing element, pass that
2253 // element in context.
2254 //
2255 // It has the same intricacies as Parse.
2256 func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2257         contextTag := ""
2258         if context != nil {
2259                 if context.Type != ElementNode {
2260                         return nil, errors.New("html: ParseFragment of non-element Node")
2261                 }
2262                 // The next check isn't just context.DataAtom.String() == context.Data because
2263                 // it is valid to pass an element whose tag isn't a known atom. For example,
2264                 // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2265                 if context.DataAtom != a.Lookup([]byte(context.Data)) {
2266                         return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2267                 }
2268                 contextTag = context.DataAtom.String()
2269         }
2270         p := &parser{
2271                 tokenizer: NewTokenizerFragment(r, contextTag),
2272                 doc: &Node{
2273                         Type: DocumentNode,
2274                 },
2275                 scripting: true,
2276                 fragment:  true,
2277                 context:   context,
2278         }
2279
2280         root := &Node{
2281                 Type:     ElementNode,
2282                 DataAtom: a.Html,
2283                 Data:     a.Html.String(),
2284         }
2285         p.doc.AppendChild(root)
2286         p.oe = nodeStack{root}
2287         if context != nil && context.DataAtom == a.Template {
2288                 p.templateStack = append(p.templateStack, inTemplateIM)
2289         }
2290         p.resetInsertionMode()
2291
2292         for n := context; n != nil; n = n.Parent {
2293                 if n.Type == ElementNode && n.DataAtom == a.Form {
2294                         p.form = n
2295                         break
2296                 }
2297         }
2298
2299         err := p.parse()
2300         if err != nil {
2301                 return nil, err
2302         }
2303
2304         parent := p.doc
2305         if context != nil {
2306                 parent = root
2307         }
2308
2309         var result []*Node
2310         for c := parent.FirstChild; c != nil; {
2311                 next := c.NextSibling
2312                 parent.RemoveChild(c)
2313                 result = append(result, c)
2314                 c = next
2315         }
2316         return result, nil
2317 }