2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011, 2014 Apple Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "core/html/parser/HTMLTreeBuilder.h"
30 #include "bindings/core/v8/ExceptionStatePlaceholder.h"
31 #include "core/HTMLNames.h"
32 #include "core/MathMLNames.h"
33 #include "core/SVGNames.h"
34 #include "core/XLinkNames.h"
35 #include "core/XMLNSNames.h"
36 #include "core/XMLNames.h"
37 #include "core/dom/DocumentFragment.h"
38 #include "core/dom/ElementTraversal.h"
39 #include "core/html/HTMLDocument.h"
40 #include "core/html/HTMLFormElement.h"
41 #include "core/html/parser/AtomicHTMLToken.h"
42 #include "core/html/parser/HTMLDocumentParser.h"
43 #include "core/html/parser/HTMLParserIdioms.h"
44 #include "core/html/parser/HTMLStackItem.h"
45 #include "core/html/parser/HTMLToken.h"
46 #include "core/html/parser/HTMLTokenizer.h"
47 #include "platform/NotImplemented.h"
48 #include "platform/text/PlatformLocale.h"
49 #include "wtf/MainThread.h"
50 #include "wtf/unicode/CharacterNames.h"
54 using namespace HTMLNames;
58 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
60 return isHTMLSpace<UChar>(character) || character == replacementCharacter;
65 static TextPosition uninitializedPositionValue1()
67 return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
70 static inline bool isAllWhitespace(const String& string)
72 return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
75 static inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
77 return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
80 static bool isNumberedHeaderTag(const AtomicString& tagName)
82 return tagName == h1Tag
90 static bool isCaptionColOrColgroupTag(const AtomicString& tagName)
92 return tagName == captionTag
94 || tagName == colgroupTag;
97 static bool isTableCellContextTag(const AtomicString& tagName)
99 return tagName == thTag || tagName == tdTag;
102 static bool isTableBodyContextTag(const AtomicString& tagName)
104 return tagName == tbodyTag
105 || tagName == tfootTag
106 || tagName == theadTag;
109 static bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
111 return tagName == bTag
113 || tagName == codeTag
115 || tagName == fontTag
118 || tagName == smallTag
119 || tagName == strikeTag
120 || tagName == strongTag
125 static bool isNonAnchorFormattingTag(const AtomicString& tagName)
127 return tagName == nobrTag
128 || isNonAnchorNonNobrFormattingTag(tagName);
131 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
132 static bool isFormattingTag(const AtomicString& tagName)
134 return tagName == aTag || isNonAnchorFormattingTag(tagName);
137 static HTMLFormElement* closestFormAncestor(Element& element)
139 ASSERT(isMainThread());
140 return Traversal<HTMLFormElement>::firstAncestorOrSelf(element);
143 class HTMLTreeBuilder::CharacterTokenBuffer {
144 WTF_MAKE_NONCOPYABLE(CharacterTokenBuffer);
146 explicit CharacterTokenBuffer(AtomicHTMLToken* token)
147 : m_characters(token->characters().impl())
149 , m_end(token->characters().length())
154 explicit CharacterTokenBuffer(const String& characters)
155 : m_characters(characters.impl())
157 , m_end(characters.length())
162 ~CharacterTokenBuffer()
167 bool isEmpty() const { return m_current == m_end; }
169 void skipAtMostOneLeadingNewline()
172 if ((*m_characters)[m_current] == '\n')
176 void skipLeadingWhitespace()
178 skipLeading<isHTMLSpace<UChar> >();
181 String takeLeadingWhitespace()
183 return takeLeading<isHTMLSpace<UChar> >();
186 void skipLeadingNonWhitespace()
188 skipLeading<isNotHTMLSpace<UChar> >();
191 String takeRemaining()
194 unsigned start = m_current;
196 // Notice that substring is smart enough to return *this when start == 0.
197 return String(m_characters->substring(start, m_end - start));
200 void giveRemainingTo(StringBuilder& recipient)
202 if (m_characters->is8Bit())
203 recipient.append(m_characters->characters8() + m_current, m_end - m_current);
205 recipient.append(m_characters->characters16() + m_current, m_end - m_current);
209 String takeRemainingWhitespace()
212 const unsigned start = m_current;
213 m_current = m_end; // One way or another, we're taking everything!
216 for (unsigned i = start; i < m_end; ++i) {
217 if (isHTMLSpace<UChar>((*m_characters)[i]))
220 // Returning the null string when there aren't any whitespace
221 // characters is slightly cleaner semantically because we don't want
222 // to insert a text node (as opposed to inserting an empty text node).
225 if (length == start - m_end) // It's all whitespace.
226 return String(m_characters->substring(start, start - m_end));
228 StringBuilder result;
229 result.reserveCapacity(length);
230 for (unsigned i = start; i < m_end; ++i) {
231 UChar c = (*m_characters)[i];
232 if (isHTMLSpace<UChar>(c))
236 return result.toString();
240 template<bool characterPredicate(UChar)>
244 while (characterPredicate((*m_characters)[m_current])) {
245 if (++m_current == m_end)
250 template<bool characterPredicate(UChar)>
254 const unsigned start = m_current;
255 skipLeading<characterPredicate>();
256 if (start == m_current)
258 return String(m_characters->substring(start, m_current - start));
261 RefPtr<StringImpl> m_characters;
266 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool, const HTMLParserOptions& options)
271 , m_tree(document, parserContentPolicy)
272 , m_insertionMode(InitialMode)
273 , m_originalInsertionMode(InitialMode)
274 , m_shouldSkipLeadingNewline(false)
276 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
281 // FIXME: Member variables should be grouped into self-initializing structs to
282 // minimize code duplication between these constructors.
283 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
288 , m_fragmentContext(fragment, contextElement)
289 , m_tree(fragment, parserContentPolicy)
290 , m_insertionMode(InitialMode)
291 , m_originalInsertionMode(InitialMode)
292 , m_shouldSkipLeadingNewline(false)
294 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
297 ASSERT(isMainThread());
298 ASSERT(contextElement);
300 // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
301 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
302 // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
303 // and instead use the DocumentFragment as a root node.
304 m_tree.openElements()->pushRootNode(HTMLStackItem::create(fragment, HTMLStackItem::ItemForDocumentFragmentNode));
306 if (isHTMLTemplateElement(*contextElement))
307 m_templateInsertionModes.append(TemplateContentsMode);
309 resetInsertionModeAppropriately();
310 m_tree.setForm(closestFormAncestor(*contextElement));
313 HTMLTreeBuilder::~HTMLTreeBuilder()
317 void HTMLTreeBuilder::trace(Visitor* visitor)
319 visitor->trace(m_fragmentContext);
320 visitor->trace(m_tree);
321 visitor->trace(m_parser);
322 visitor->trace(m_scriptToProcess);
325 void HTMLTreeBuilder::detach()
328 // This call makes little sense in fragment mode, but for consistency
329 // DocumentParser expects detach() to always be called before it's destroyed.
330 m_isAttached = false;
332 // HTMLConstructionSite might be on the callstack when detach() is called
333 // otherwise we'd just call m_tree.clear() here instead.
337 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
338 : m_fragment(nullptr)
342 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement)
343 : m_fragment(fragment)
345 ASSERT(!fragment->hasChildren());
346 m_contextElementStackItem = HTMLStackItem::create(contextElement, HTMLStackItem::ItemForContextElement);
349 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
353 void HTMLTreeBuilder::FragmentParsingContext::trace(Visitor* visitor)
355 visitor->trace(m_fragment);
356 visitor->trace(m_contextElementStackItem);
359 PassRefPtrWillBeRawPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
361 ASSERT(m_scriptToProcess);
362 ASSERT(!m_tree.hasPendingTasks());
363 // Unpause ourselves, callers may pause us again when processing the script.
364 // The HTML5 spec is written as though scripts are executed inside the tree
365 // builder. We pause the parser to exit the tree builder, and then resume
366 // before running scripts.
367 scriptStartPosition = m_scriptToProcessStartPosition;
368 m_scriptToProcessStartPosition = uninitializedPositionValue1();
369 return m_scriptToProcess.release();
372 void HTMLTreeBuilder::constructTree(AtomicHTMLToken* token)
374 if (shouldProcessTokenInForeignContent(token))
375 processTokenInForeignContent(token);
379 if (m_parser->tokenizer()) {
380 bool inForeignContent = false;
381 if (!m_tree.isEmpty()) {
382 HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
383 inForeignContent = !adjustedCurrentNode->isInHTMLNamespace()
384 && !HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)
385 && !HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode);
388 m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
389 m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
392 m_tree.executeQueuedTasks();
393 // We might be detached now.
396 void HTMLTreeBuilder::processToken(AtomicHTMLToken* token)
398 if (token->type() == HTMLToken::Character) {
399 processCharacter(token);
403 // Any non-character token needs to cause us to flush any pending text immediately.
404 // NOTE: flush() can cause any queued tasks to execute, possibly re-entering the parser.
405 m_tree.flush(FlushAlways);
406 m_shouldSkipLeadingNewline = false;
408 switch (token->type()) {
409 case HTMLToken::Uninitialized:
410 case HTMLToken::Character:
411 ASSERT_NOT_REACHED();
413 case HTMLToken::DOCTYPE:
414 processDoctypeToken(token);
416 case HTMLToken::StartTag:
417 processStartTag(token);
419 case HTMLToken::EndTag:
420 processEndTag(token);
422 case HTMLToken::Comment:
423 processComment(token);
425 case HTMLToken::EndOfFile:
426 processEndOfFile(token);
431 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken* token)
433 ASSERT(token->type() == HTMLToken::DOCTYPE);
434 if (m_insertionMode == InitialMode) {
435 m_tree.insertDoctype(token);
436 setInsertionMode(BeforeHTMLMode);
439 if (m_insertionMode == InTableTextMode) {
440 defaultForInTableText();
441 processDoctypeToken(token);
447 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, const Vector<Attribute>& attributes)
449 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
450 AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
451 processStartTag(&fakeToken);
454 void HTMLTreeBuilder::processFakeEndTag(const AtomicString& tagName)
456 AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName);
457 processEndTag(&fakeToken);
460 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
462 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
463 processFakeEndTag(tagName.localName());
466 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
468 if (!m_tree.openElements()->inButtonScope(pTag.localName()))
470 AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
471 processEndTag(&endP);
476 bool isLi(const HTMLStackItem* item)
478 return item->hasTagName(liTag);
481 bool isDdOrDt(const HTMLStackItem* item)
483 return item->hasTagName(ddTag)
484 || item->hasTagName(dtTag);
489 template <bool shouldClose(const HTMLStackItem*)>
490 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken* token)
492 m_framesetOk = false;
493 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
495 RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem();
496 if (shouldClose(item.get())) {
497 ASSERT(item->isElementNode());
498 processFakeEndTag(item->localName());
501 if (item->isSpecialNode() && !item->hasTagName(addressTag) && !item->hasTagName(divTag) && !item->hasTagName(pTag))
503 nodeRecord = nodeRecord->next();
505 processFakePEndTagIfPInButtonScope();
506 m_tree.insertHTMLElement(token);
509 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
511 template <typename TableQualifiedName>
512 static void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, const TableQualifiedName* const* names, size_t length)
514 for (size_t i = 0; i < length; ++i) {
515 const QualifiedName& name = *names[i];
516 const AtomicString& localName = name.localName();
517 AtomicString loweredLocalName = localName.lower();
518 if (loweredLocalName != localName)
519 map->add(loweredLocalName, name);
523 static void adjustSVGTagNameCase(AtomicHTMLToken* token)
525 static PrefixedNameToQualifiedNameMap* caseMap = 0;
527 caseMap = new PrefixedNameToQualifiedNameMap;
528 OwnPtr<const SVGQualifiedName*[]> svgTags = SVGNames::getSVGTags();
529 mapLoweredLocalNameToName(caseMap, svgTags.get(), SVGNames::SVGTagsCount);
532 const QualifiedName& casedName = caseMap->get(token->name());
533 if (casedName.localName().isNull())
535 token->setName(casedName.localName());
538 template<PassOwnPtr<const QualifiedName*[]> getAttrs(), unsigned length>
539 static void adjustAttributes(AtomicHTMLToken* token)
541 static PrefixedNameToQualifiedNameMap* caseMap = 0;
543 caseMap = new PrefixedNameToQualifiedNameMap;
544 OwnPtr<const QualifiedName*[]> attrs = getAttrs();
545 mapLoweredLocalNameToName(caseMap, attrs.get(), length);
548 for (unsigned i = 0; i < token->attributes().size(); ++i) {
549 Attribute& tokenAttribute = token->attributes().at(i);
550 const QualifiedName& casedName = caseMap->get(tokenAttribute.localName());
551 if (!casedName.localName().isNull())
552 tokenAttribute.parserSetName(casedName);
556 static void adjustSVGAttributes(AtomicHTMLToken* token)
558 adjustAttributes<SVGNames::getSVGAttrs, SVGNames::SVGAttrsCount>(token);
561 static void adjustMathMLAttributes(AtomicHTMLToken* token)
563 adjustAttributes<MathMLNames::getMathMLAttrs, MathMLNames::MathMLAttrsCount>(token);
566 static void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, const QualifiedName* const* names, size_t length)
568 for (size_t i = 0; i < length; ++i) {
569 const QualifiedName* name = names[i];
570 const AtomicString& localName = name->localName();
571 AtomicString prefixColonLocalName = prefix + ':' + localName;
572 QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
573 map->add(prefixColonLocalName, nameWithPrefix);
577 static void adjustForeignAttributes(AtomicHTMLToken* token)
579 static PrefixedNameToQualifiedNameMap* map = 0;
581 map = new PrefixedNameToQualifiedNameMap;
583 OwnPtr<const QualifiedName*[]> attrs = XLinkNames::getXLinkAttrs();
584 addNamesWithPrefix(map, xlinkAtom, attrs.get(), XLinkNames::XLinkAttrsCount);
586 OwnPtr<const QualifiedName*[]> xmlAttrs = XMLNames::getXMLAttrs();
587 addNamesWithPrefix(map, xmlAtom, xmlAttrs.get(), XMLNames::XMLAttrsCount);
589 map->add(WTF::xmlnsAtom, XMLNSNames::xmlnsAttr);
590 map->add("xmlns:xlink", QualifiedName(xmlnsAtom, xlinkAtom, XMLNSNames::xmlnsNamespaceURI));
593 for (unsigned i = 0; i < token->attributes().size(); ++i) {
594 Attribute& tokenAttribute = token->attributes().at(i);
595 const QualifiedName& name = map->get(tokenAttribute.localName());
596 if (!name.localName().isNull())
597 tokenAttribute.parserSetName(name);
601 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken* token)
603 ASSERT(token->type() == HTMLToken::StartTag);
604 if (token->name() == htmlTag) {
605 processHtmlStartTagForInBody(token);
608 if (token->name() == baseTag
609 || token->name() == basefontTag
610 || token->name() == bgsoundTag
611 || token->name() == commandTag
612 || token->name() == linkTag
613 || token->name() == metaTag
614 || token->name() == noframesTag
615 || token->name() == scriptTag
616 || token->name() == styleTag
617 || token->name() == titleTag) {
618 bool didProcess = processStartTagForInHead(token);
619 ASSERT_UNUSED(didProcess, didProcess);
622 if (token->name() == bodyTag) {
624 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement() || m_tree.openElements()->hasTemplateInHTMLScope()) {
625 ASSERT(isParsingFragmentOrTemplateContents());
628 m_framesetOk = false;
629 m_tree.insertHTMLBodyStartTagInBody(token);
632 if (token->name() == framesetTag) {
634 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
635 ASSERT(isParsingFragmentOrTemplateContents());
640 m_tree.openElements()->bodyElement()->remove(ASSERT_NO_EXCEPTION);
641 m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
642 m_tree.openElements()->popHTMLBodyElement();
643 ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
644 m_tree.insertHTMLElement(token);
645 setInsertionMode(InFramesetMode);
648 if (token->name() == addressTag
649 || token->name() == articleTag
650 || token->name() == asideTag
651 || token->name() == blockquoteTag
652 || token->name() == centerTag
653 || token->name() == detailsTag
654 || token->name() == dirTag
655 || token->name() == divTag
656 || token->name() == dlTag
657 || token->name() == fieldsetTag
658 || token->name() == figcaptionTag
659 || token->name() == figureTag
660 || token->name() == footerTag
661 || token->name() == headerTag
662 || token->name() == hgroupTag
663 || token->name() == mainTag
664 || token->name() == menuTag
665 || token->name() == navTag
666 || token->name() == olTag
667 || token->name() == pTag
668 || token->name() == sectionTag
669 || token->name() == summaryTag
670 || token->name() == ulTag) {
671 processFakePEndTagIfPInButtonScope();
672 m_tree.insertHTMLElement(token);
675 if (isNumberedHeaderTag(token->name())) {
676 processFakePEndTagIfPInButtonScope();
677 if (m_tree.currentStackItem()->isNumberedHeaderElement()) {
679 m_tree.openElements()->pop();
681 m_tree.insertHTMLElement(token);
684 if (token->name() == preTag || token->name() == listingTag) {
685 processFakePEndTagIfPInButtonScope();
686 m_tree.insertHTMLElement(token);
687 m_shouldSkipLeadingNewline = true;
688 m_framesetOk = false;
691 if (token->name() == formTag) {
696 processFakePEndTagIfPInButtonScope();
697 m_tree.insertHTMLFormElement(token);
700 if (token->name() == liTag) {
701 processCloseWhenNestedTag<isLi>(token);
704 if (token->name() == ddTag || token->name() == dtTag) {
705 processCloseWhenNestedTag<isDdOrDt>(token);
708 if (token->name() == plaintextTag) {
709 processFakePEndTagIfPInButtonScope();
710 m_tree.insertHTMLElement(token);
711 if (m_parser->tokenizer())
712 m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
715 if (token->name() == buttonTag) {
716 if (m_tree.openElements()->inScope(buttonTag)) {
718 processFakeEndTag(buttonTag);
719 processStartTag(token); // FIXME: Could we just fall through here?
722 m_tree.reconstructTheActiveFormattingElements();
723 m_tree.insertHTMLElement(token);
724 m_framesetOk = false;
727 if (token->name() == aTag) {
728 Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
731 processFakeEndTag(aTag);
732 m_tree.activeFormattingElements()->remove(activeATag);
733 if (m_tree.openElements()->contains(activeATag))
734 m_tree.openElements()->remove(activeATag);
736 m_tree.reconstructTheActiveFormattingElements();
737 m_tree.insertFormattingElement(token);
740 if (isNonAnchorNonNobrFormattingTag(token->name())) {
741 m_tree.reconstructTheActiveFormattingElements();
742 m_tree.insertFormattingElement(token);
745 if (token->name() == nobrTag) {
746 m_tree.reconstructTheActiveFormattingElements();
747 if (m_tree.openElements()->inScope(nobrTag)) {
749 processFakeEndTag(nobrTag);
750 m_tree.reconstructTheActiveFormattingElements();
752 m_tree.insertFormattingElement(token);
755 if (token->name() == appletTag
756 || token->name() == embedTag
757 || token->name() == objectTag) {
758 if (!pluginContentIsAllowed(m_tree.parserContentPolicy()))
761 if (token->name() == appletTag
762 || token->name() == marqueeTag
763 || token->name() == objectTag) {
764 m_tree.reconstructTheActiveFormattingElements();
765 m_tree.insertHTMLElement(token);
766 m_tree.activeFormattingElements()->appendMarker();
767 m_framesetOk = false;
770 if (token->name() == tableTag) {
771 if (!m_tree.inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
772 processFakeEndTag(pTag);
773 m_tree.insertHTMLElement(token);
774 m_framesetOk = false;
775 setInsertionMode(InTableMode);
778 if (token->name() == imageTag) {
780 // Apparently we're not supposed to ask.
781 token->setName(imgTag.localName());
782 // Note the fall through to the imgTag handling below!
784 if (token->name() == areaTag
785 || token->name() == brTag
786 || token->name() == embedTag
787 || token->name() == imgTag
788 || token->name() == keygenTag
789 || token->name() == wbrTag) {
790 m_tree.reconstructTheActiveFormattingElements();
791 m_tree.insertSelfClosingHTMLElement(token);
792 m_framesetOk = false;
795 if (token->name() == inputTag) {
796 Attribute* typeAttribute = token->getAttributeItem(typeAttr);
797 m_tree.reconstructTheActiveFormattingElements();
798 m_tree.insertSelfClosingHTMLElement(token);
799 if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
800 m_framesetOk = false;
803 if ((RuntimeEnabledFeatures::contextMenuEnabled() && token->name() == menuitemTag)
804 || token->name() == paramTag
805 || token->name() == sourceTag
806 || token->name() == trackTag) {
807 m_tree.insertSelfClosingHTMLElement(token);
810 if (token->name() == hrTag) {
811 processFakePEndTagIfPInButtonScope();
812 m_tree.insertSelfClosingHTMLElement(token);
813 m_framesetOk = false;
816 if (token->name() == textareaTag) {
817 m_tree.insertHTMLElement(token);
818 m_shouldSkipLeadingNewline = true;
819 if (m_parser->tokenizer())
820 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
821 m_originalInsertionMode = m_insertionMode;
822 m_framesetOk = false;
823 setInsertionMode(TextMode);
826 if (token->name() == xmpTag) {
827 processFakePEndTagIfPInButtonScope();
828 m_tree.reconstructTheActiveFormattingElements();
829 m_framesetOk = false;
830 processGenericRawTextStartTag(token);
833 if (token->name() == iframeTag) {
834 m_framesetOk = false;
835 processGenericRawTextStartTag(token);
838 if (token->name() == noembedTag && m_options.pluginsEnabled) {
839 processGenericRawTextStartTag(token);
842 if (token->name() == noscriptTag && m_options.scriptEnabled) {
843 processGenericRawTextStartTag(token);
846 if (token->name() == selectTag) {
847 m_tree.reconstructTheActiveFormattingElements();
848 m_tree.insertHTMLElement(token);
849 m_framesetOk = false;
850 if (m_insertionMode == InTableMode
851 || m_insertionMode == InCaptionMode
852 || m_insertionMode == InColumnGroupMode
853 || m_insertionMode == InTableBodyMode
854 || m_insertionMode == InRowMode
855 || m_insertionMode == InCellMode)
856 setInsertionMode(InSelectInTableMode);
858 setInsertionMode(InSelectMode);
861 if (token->name() == optgroupTag || token->name() == optionTag) {
862 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
863 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
864 processEndTag(&endOption);
866 m_tree.reconstructTheActiveFormattingElements();
867 m_tree.insertHTMLElement(token);
870 if (token->name() == rbTag || token->name() == rtcTag) {
871 if (m_tree.openElements()->inScope(rubyTag.localName())) {
872 m_tree.generateImpliedEndTags();
873 if (!m_tree.currentStackItem()->hasTagName(rubyTag))
876 m_tree.insertHTMLElement(token);
879 if (token->name() == rtTag || token->name() == rpTag) {
880 if (m_tree.openElements()->inScope(rubyTag.localName())) {
881 m_tree.generateImpliedEndTagsWithExclusion(rtcTag.localName());
882 if (!m_tree.currentStackItem()->hasTagName(rubyTag) && !m_tree.currentStackItem()->hasTagName(rtcTag))
885 m_tree.insertHTMLElement(token);
888 if (token->name() == MathMLNames::mathTag.localName()) {
889 m_tree.reconstructTheActiveFormattingElements();
890 adjustMathMLAttributes(token);
891 adjustForeignAttributes(token);
892 m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
895 if (token->name() == SVGNames::svgTag.localName()) {
896 m_tree.reconstructTheActiveFormattingElements();
897 adjustSVGAttributes(token);
898 adjustForeignAttributes(token);
899 m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
902 if (isCaptionColOrColgroupTag(token->name())
903 || token->name() == frameTag
904 || token->name() == headTag
905 || isTableBodyContextTag(token->name())
906 || isTableCellContextTag(token->name())
907 || token->name() == trTag) {
911 if (token->name() == templateTag) {
912 processTemplateStartTag(token);
915 m_tree.reconstructTheActiveFormattingElements();
916 m_tree.insertHTMLElement(token);
919 void HTMLTreeBuilder::processTemplateStartTag(AtomicHTMLToken* token)
921 m_tree.activeFormattingElements()->appendMarker();
922 m_tree.insertHTMLElement(token);
923 m_templateInsertionModes.append(TemplateContentsMode);
924 setInsertionMode(TemplateContentsMode);
927 bool HTMLTreeBuilder::processTemplateEndTag(AtomicHTMLToken* token)
929 ASSERT(token->name() == templateTag.localName());
930 if (!m_tree.openElements()->hasTemplateInHTMLScope()) {
931 ASSERT(m_templateInsertionModes.isEmpty() || (m_templateInsertionModes.size() == 1 && isHTMLTemplateElement(m_fragmentContext.contextElement())));
935 m_tree.generateImpliedEndTags();
936 if (!m_tree.currentStackItem()->hasTagName(templateTag))
938 m_tree.openElements()->popUntilPopped(templateTag);
939 m_tree.activeFormattingElements()->clearToLastMarker();
940 m_templateInsertionModes.removeLast();
941 resetInsertionModeAppropriately();
945 bool HTMLTreeBuilder::processEndOfFileForInTemplateContents(AtomicHTMLToken* token)
947 AtomicHTMLToken endTemplate(HTMLToken::EndTag, templateTag.localName());
948 if (!processTemplateEndTag(&endTemplate))
951 processEndOfFile(token);
955 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
957 if (m_tree.currentIsRootNode() || isHTMLTemplateElement(*m_tree.currentNode())) {
958 ASSERT(isParsingFragmentOrTemplateContents());
959 // FIXME: parse error
962 m_tree.openElements()->pop();
963 setInsertionMode(InTableMode);
967 // http://www.whatwg.org/specs/web-apps/current-work/#adjusted-current-node
968 HTMLStackItem* HTMLTreeBuilder::adjustedCurrentStackItem() const
970 ASSERT(!m_tree.isEmpty());
971 if (isParsingFragment() && m_tree.openElements()->hasOnlyOneElement())
972 return m_fragmentContext.contextElementStackItem();
974 return m_tree.currentStackItem();
977 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
978 void HTMLTreeBuilder::closeTheCell()
980 ASSERT(insertionMode() == InCellMode);
981 if (m_tree.openElements()->inTableScope(tdTag)) {
982 ASSERT(!m_tree.openElements()->inTableScope(thTag));
983 processFakeEndTag(tdTag);
986 ASSERT(m_tree.openElements()->inTableScope(thTag));
987 processFakeEndTag(thTag);
988 ASSERT(insertionMode() == InRowMode);
991 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken* token)
993 ASSERT(token->type() == HTMLToken::StartTag);
994 if (token->name() == captionTag) {
995 m_tree.openElements()->popUntilTableScopeMarker();
996 m_tree.activeFormattingElements()->appendMarker();
997 m_tree.insertHTMLElement(token);
998 setInsertionMode(InCaptionMode);
1001 if (token->name() == colgroupTag) {
1002 m_tree.openElements()->popUntilTableScopeMarker();
1003 m_tree.insertHTMLElement(token);
1004 setInsertionMode(InColumnGroupMode);
1007 if (token->name() == colTag) {
1008 processFakeStartTag(colgroupTag);
1009 ASSERT(InColumnGroupMode);
1010 processStartTag(token);
1013 if (isTableBodyContextTag(token->name())) {
1014 m_tree.openElements()->popUntilTableScopeMarker();
1015 m_tree.insertHTMLElement(token);
1016 setInsertionMode(InTableBodyMode);
1019 if (isTableCellContextTag(token->name())
1020 || token->name() == trTag) {
1021 processFakeStartTag(tbodyTag);
1022 ASSERT(insertionMode() == InTableBodyMode);
1023 processStartTag(token);
1026 if (token->name() == tableTag) {
1028 if (!processTableEndTagForInTable()) {
1029 ASSERT(isParsingFragmentOrTemplateContents());
1032 processStartTag(token);
1035 if (token->name() == styleTag || token->name() == scriptTag) {
1036 processStartTagForInHead(token);
1039 if (token->name() == inputTag) {
1040 Attribute* typeAttribute = token->getAttributeItem(typeAttr);
1041 if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1043 m_tree.insertSelfClosingHTMLElement(token);
1046 // Fall through to "anything else" case.
1048 if (token->name() == formTag) {
1052 m_tree.insertHTMLFormElement(token, true);
1053 m_tree.openElements()->pop();
1056 if (token->name() == templateTag) {
1057 processTemplateStartTag(token);
1061 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1062 processStartTagForInBody(token);
1065 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken* token)
1067 ASSERT(token->type() == HTMLToken::StartTag);
1068 switch (insertionMode()) {
1070 ASSERT(insertionMode() == InitialMode);
1071 defaultForInitial();
1073 case BeforeHTMLMode:
1074 ASSERT(insertionMode() == BeforeHTMLMode);
1075 if (token->name() == htmlTag) {
1076 m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1077 setInsertionMode(BeforeHeadMode);
1080 defaultForBeforeHTML();
1082 case BeforeHeadMode:
1083 ASSERT(insertionMode() == BeforeHeadMode);
1084 if (token->name() == htmlTag) {
1085 processHtmlStartTagForInBody(token);
1088 if (token->name() == headTag) {
1089 m_tree.insertHTMLHeadElement(token);
1090 setInsertionMode(InHeadMode);
1093 defaultForBeforeHead();
1096 ASSERT(insertionMode() == InHeadMode);
1097 if (processStartTagForInHead(token))
1102 ASSERT(insertionMode() == AfterHeadMode);
1103 if (token->name() == htmlTag) {
1104 processHtmlStartTagForInBody(token);
1107 if (token->name() == bodyTag) {
1108 m_framesetOk = false;
1109 m_tree.insertHTMLBodyElement(token);
1110 setInsertionMode(InBodyMode);
1113 if (token->name() == framesetTag) {
1114 m_tree.insertHTMLElement(token);
1115 setInsertionMode(InFramesetMode);
1118 if (token->name() == baseTag
1119 || token->name() == basefontTag
1120 || token->name() == bgsoundTag
1121 || token->name() == linkTag
1122 || token->name() == metaTag
1123 || token->name() == noframesTag
1124 || token->name() == scriptTag
1125 || token->name() == styleTag
1126 || token->name() == templateTag
1127 || token->name() == titleTag) {
1129 ASSERT(m_tree.head());
1130 m_tree.openElements()->pushHTMLHeadElement(m_tree.headStackItem());
1131 processStartTagForInHead(token);
1132 m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1135 if (token->name() == headTag) {
1139 defaultForAfterHead();
1142 ASSERT(insertionMode() == InBodyMode);
1143 processStartTagForInBody(token);
1146 ASSERT(insertionMode() == InTableMode);
1147 processStartTagForInTable(token);
1150 ASSERT(insertionMode() == InCaptionMode);
1151 if (isCaptionColOrColgroupTag(token->name())
1152 || isTableBodyContextTag(token->name())
1153 || isTableCellContextTag(token->name())
1154 || token->name() == trTag) {
1156 if (!processCaptionEndTagForInCaption()) {
1157 ASSERT(isParsingFragment());
1160 processStartTag(token);
1163 processStartTagForInBody(token);
1165 case InColumnGroupMode:
1166 ASSERT(insertionMode() == InColumnGroupMode);
1167 if (token->name() == htmlTag) {
1168 processHtmlStartTagForInBody(token);
1171 if (token->name() == colTag) {
1172 m_tree.insertSelfClosingHTMLElement(token);
1175 if (token->name() == templateTag) {
1176 processTemplateStartTag(token);
1179 if (!processColgroupEndTagForInColumnGroup()) {
1180 ASSERT(isParsingFragmentOrTemplateContents());
1183 processStartTag(token);
1185 case InTableBodyMode:
1186 ASSERT(insertionMode() == InTableBodyMode);
1187 if (token->name() == trTag) {
1188 m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1189 m_tree.insertHTMLElement(token);
1190 setInsertionMode(InRowMode);
1193 if (isTableCellContextTag(token->name())) {
1195 processFakeStartTag(trTag);
1196 ASSERT(insertionMode() == InRowMode);
1197 processStartTag(token);
1200 if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name())) {
1201 // FIXME: This is slow.
1202 if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) {
1203 ASSERT(isParsingFragmentOrTemplateContents());
1207 m_tree.openElements()->popUntilTableBodyScopeMarker();
1208 ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName()));
1209 processFakeEndTag(m_tree.currentStackItem()->localName());
1210 processStartTag(token);
1213 processStartTagForInTable(token);
1216 ASSERT(insertionMode() == InRowMode);
1217 if (isTableCellContextTag(token->name())) {
1218 m_tree.openElements()->popUntilTableRowScopeMarker();
1219 m_tree.insertHTMLElement(token);
1220 setInsertionMode(InCellMode);
1221 m_tree.activeFormattingElements()->appendMarker();
1224 if (token->name() == trTag
1225 || isCaptionColOrColgroupTag(token->name())
1226 || isTableBodyContextTag(token->name())) {
1227 if (!processTrEndTagForInRow()) {
1228 ASSERT(isParsingFragmentOrTemplateContents());
1231 ASSERT(insertionMode() == InTableBodyMode);
1232 processStartTag(token);
1235 processStartTagForInTable(token);
1238 ASSERT(insertionMode() == InCellMode);
1239 if (isCaptionColOrColgroupTag(token->name())
1240 || isTableCellContextTag(token->name())
1241 || token->name() == trTag
1242 || isTableBodyContextTag(token->name())) {
1243 // FIXME: This could be more efficient.
1244 if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1245 ASSERT(isParsingFragment());
1250 processStartTag(token);
1253 processStartTagForInBody(token);
1256 case AfterAfterBodyMode:
1257 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1258 if (token->name() == htmlTag) {
1259 processHtmlStartTagForInBody(token);
1262 setInsertionMode(InBodyMode);
1263 processStartTag(token);
1265 case InHeadNoscriptMode:
1266 ASSERT(insertionMode() == InHeadNoscriptMode);
1267 if (token->name() == htmlTag) {
1268 processHtmlStartTagForInBody(token);
1271 if (token->name() == basefontTag
1272 || token->name() == bgsoundTag
1273 || token->name() == linkTag
1274 || token->name() == metaTag
1275 || token->name() == noframesTag
1276 || token->name() == styleTag) {
1277 bool didProcess = processStartTagForInHead(token);
1278 ASSERT_UNUSED(didProcess, didProcess);
1281 if (token->name() == htmlTag || token->name() == noscriptTag) {
1285 defaultForInHeadNoscript();
1286 processToken(token);
1288 case InFramesetMode:
1289 ASSERT(insertionMode() == InFramesetMode);
1290 if (token->name() == htmlTag) {
1291 processHtmlStartTagForInBody(token);
1294 if (token->name() == framesetTag) {
1295 m_tree.insertHTMLElement(token);
1298 if (token->name() == frameTag) {
1299 m_tree.insertSelfClosingHTMLElement(token);
1302 if (token->name() == noframesTag) {
1303 processStartTagForInHead(token);
1306 if (token->name() == templateTag) {
1307 processTemplateStartTag(token);
1312 case AfterFramesetMode:
1313 case AfterAfterFramesetMode:
1314 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1315 if (token->name() == htmlTag) {
1316 processHtmlStartTagForInBody(token);
1319 if (token->name() == noframesTag) {
1320 processStartTagForInHead(token);
1325 case InSelectInTableMode:
1326 ASSERT(insertionMode() == InSelectInTableMode);
1327 if (token->name() == captionTag
1328 || token->name() == tableTag
1329 || isTableBodyContextTag(token->name())
1330 || token->name() == trTag
1331 || isTableCellContextTag(token->name())) {
1333 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1334 processEndTag(&endSelect);
1335 processStartTag(token);
1340 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1341 if (token->name() == htmlTag) {
1342 processHtmlStartTagForInBody(token);
1345 if (token->name() == optionTag) {
1346 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
1347 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1348 processEndTag(&endOption);
1350 m_tree.insertHTMLElement(token);
1353 if (token->name() == optgroupTag) {
1354 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
1355 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1356 processEndTag(&endOption);
1358 if (m_tree.currentStackItem()->hasTagName(optgroupTag)) {
1359 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1360 processEndTag(&endOptgroup);
1362 m_tree.insertHTMLElement(token);
1365 if (token->name() == selectTag) {
1367 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1368 processEndTag(&endSelect);
1371 if (token->name() == inputTag
1372 || token->name() == keygenTag
1373 || token->name() == textareaTag) {
1375 if (!m_tree.openElements()->inSelectScope(selectTag)) {
1376 ASSERT(isParsingFragment());
1379 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1380 processEndTag(&endSelect);
1381 processStartTag(token);
1384 if (token->name() == scriptTag) {
1385 bool didProcess = processStartTagForInHead(token);
1386 ASSERT_UNUSED(didProcess, didProcess);
1389 if (token->name() == templateTag) {
1390 processTemplateStartTag(token);
1394 case InTableTextMode:
1395 defaultForInTableText();
1396 processStartTag(token);
1399 ASSERT_NOT_REACHED();
1401 case TemplateContentsMode:
1402 if (token->name() == templateTag) {
1403 processTemplateStartTag(token);
1407 if (token->name() == linkTag
1408 || token->name() == scriptTag
1409 || token->name() == styleTag
1410 || token->name() == metaTag) {
1411 processStartTagForInHead(token);
1415 InsertionMode insertionMode = TemplateContentsMode;
1416 if (token->name() == frameTag)
1417 insertionMode = InFramesetMode;
1418 else if (token->name() == colTag)
1419 insertionMode = InColumnGroupMode;
1420 else if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name()))
1421 insertionMode = InTableMode;
1422 else if (token->name() == trTag)
1423 insertionMode = InTableBodyMode;
1424 else if (isTableCellContextTag(token->name()))
1425 insertionMode = InRowMode;
1427 insertionMode = InBodyMode;
1429 ASSERT(insertionMode != TemplateContentsMode);
1430 ASSERT(m_templateInsertionModes.last() == TemplateContentsMode);
1431 m_templateInsertionModes.last() = insertionMode;
1432 setInsertionMode(insertionMode);
1434 processStartTag(token);
1439 void HTMLTreeBuilder::processHtmlStartTagForInBody(AtomicHTMLToken* token)
1442 if (m_tree.openElements()->hasTemplateInHTMLScope()) {
1443 ASSERT(isParsingTemplateContents());
1446 m_tree.insertHTMLHtmlStartTagInBody(token);
1449 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken* token)
1451 ASSERT(token->type() == HTMLToken::EndTag);
1452 ASSERT(token->name() == bodyTag);
1453 if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1457 notImplemented(); // Emit a more specific parse error based on stack contents.
1458 setInsertionMode(AfterBodyMode);
1462 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken* token)
1464 ASSERT(token->type() == HTMLToken::EndTag);
1465 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1467 RefPtrWillBeRawPtr<HTMLStackItem> item = record->stackItem();
1468 if (item->matchesHTMLTag(token->name())) {
1469 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1470 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1472 m_tree.openElements()->popUntilPopped(item->element());
1475 if (item->isSpecialNode()) {
1479 record = record->next();
1483 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
1484 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken* token)
1486 // The adoption agency algorithm is N^2. We limit the number of iterations
1487 // to stop from hanging the whole browser. This limit is specified in the
1488 // adoption agency algorithm:
1489 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1490 static const int outerIterationLimit = 8;
1491 static const int innerIterationLimit = 3;
1493 // 1, 2, 3 and 16 are covered by the for() loop.
1494 for (int i = 0; i < outerIterationLimit; ++i) {
1496 Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token->name());
1498 if (!formattingElement)
1499 return processAnyOtherEndTagForInBody(token);
1501 if ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement)) {
1503 notImplemented(); // Check the stack of open elements for a more specific parse error.
1507 HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1508 if (!formattingElementRecord) {
1510 m_tree.activeFormattingElements()->remove(formattingElement);
1514 if (formattingElement != m_tree.currentElement())
1517 HTMLElementStack::ElementRecord* furthestBlock = m_tree.openElements()->furthestBlockForFormattingElement(formattingElement);
1519 if (!furthestBlock) {
1520 m_tree.openElements()->popUntilPopped(formattingElement);
1521 m_tree.activeFormattingElements()->remove(formattingElement);
1525 ASSERT(furthestBlock->isAbove(formattingElementRecord));
1526 RefPtrWillBeRawPtr<HTMLStackItem> commonAncestor = formattingElementRecord->next()->stackItem();
1528 HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1530 HTMLElementStack::ElementRecord* node = furthestBlock;
1531 HTMLElementStack::ElementRecord* nextNode = node->next();
1532 HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1533 // 9.1, 9.2, 9.3 and 9.11 are covered by the for() loop.
1534 for (int i = 0; i < innerIterationLimit; ++i) {
1538 nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 9.5.
1540 if (!m_tree.activeFormattingElements()->contains(node->element())) {
1541 m_tree.openElements()->remove(node->element());
1546 if (node == formattingElementRecord)
1549 RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(node->stackItem().get());
1551 HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1552 nodeEntry->replaceElement(newItem);
1553 node->replaceElement(newItem.release());
1556 if (lastNode == furthestBlock)
1557 bookmark.moveToAfter(nodeEntry);
1559 m_tree.reparent(node, lastNode);
1564 m_tree.insertAlreadyParsedChild(commonAncestor.get(), lastNode);
1566 RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem().get());
1568 m_tree.takeAllChildren(newItem.get(), furthestBlock);
1570 m_tree.reparent(furthestBlock, newItem.get());
1572 m_tree.activeFormattingElements()->swapTo(formattingElement, newItem, bookmark);
1574 m_tree.openElements()->remove(formattingElement);
1575 m_tree.openElements()->insertAbove(newItem, furthestBlock);
1579 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1581 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1583 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1585 RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem();
1586 if (item->node() == m_tree.openElements()->rootNode()) {
1588 if (isParsingFragment())
1589 item = m_fragmentContext.contextElementStackItem();
1591 if (item->hasTagName(templateTag))
1592 return setInsertionMode(m_templateInsertionModes.last());
1593 if (item->hasTagName(selectTag)) {
1595 while (item->node() != m_tree.openElements()->rootNode() && !item->hasTagName(templateTag)) {
1596 nodeRecord = nodeRecord->next();
1597 item = nodeRecord->stackItem();
1598 if (item->hasTagName(tableTag))
1599 return setInsertionMode(InSelectInTableMode);
1602 return setInsertionMode(InSelectMode);
1604 if (item->hasTagName(tdTag) || item->hasTagName(thTag))
1605 return setInsertionMode(InCellMode);
1606 if (item->hasTagName(trTag))
1607 return setInsertionMode(InRowMode);
1608 if (item->hasTagName(tbodyTag) || item->hasTagName(theadTag) || item->hasTagName(tfootTag))
1609 return setInsertionMode(InTableBodyMode);
1610 if (item->hasTagName(captionTag))
1611 return setInsertionMode(InCaptionMode);
1612 if (item->hasTagName(colgroupTag)) {
1613 return setInsertionMode(InColumnGroupMode);
1615 if (item->hasTagName(tableTag))
1616 return setInsertionMode(InTableMode);
1617 if (item->hasTagName(headTag)) {
1618 if (!m_fragmentContext.fragment() || m_fragmentContext.contextElement() != item->node())
1619 return setInsertionMode(InHeadMode);
1620 return setInsertionMode(InBodyMode);
1622 if (item->hasTagName(bodyTag))
1623 return setInsertionMode(InBodyMode);
1624 if (item->hasTagName(framesetTag)) {
1625 return setInsertionMode(InFramesetMode);
1627 if (item->hasTagName(htmlTag)) {
1628 if (m_tree.headStackItem())
1629 return setInsertionMode(AfterHeadMode);
1631 ASSERT(isParsingFragment());
1632 return setInsertionMode(BeforeHeadMode);
1635 ASSERT(isParsingFragment());
1636 return setInsertionMode(InBodyMode);
1638 nodeRecord = nodeRecord->next();
1642 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken* token)
1644 ASSERT(token->type() == HTMLToken::EndTag);
1645 if (isTableBodyContextTag(token->name())) {
1646 if (!m_tree.openElements()->inTableScope(token->name())) {
1650 m_tree.openElements()->popUntilTableBodyScopeMarker();
1651 m_tree.openElements()->pop();
1652 setInsertionMode(InTableMode);
1655 if (token->name() == tableTag) {
1656 // FIXME: This is slow.
1657 if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) {
1658 ASSERT(isParsingFragmentOrTemplateContents());
1662 m_tree.openElements()->popUntilTableBodyScopeMarker();
1663 ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName()));
1664 processFakeEndTag(m_tree.currentStackItem()->localName());
1665 processEndTag(token);
1668 if (token->name() == bodyTag
1669 || isCaptionColOrColgroupTag(token->name())
1670 || token->name() == htmlTag
1671 || isTableCellContextTag(token->name())
1672 || token->name() == trTag) {
1676 processEndTagForInTable(token);
1679 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken* token)
1681 ASSERT(token->type() == HTMLToken::EndTag);
1682 if (token->name() == trTag) {
1683 processTrEndTagForInRow();
1686 if (token->name() == tableTag) {
1687 if (!processTrEndTagForInRow()) {
1688 ASSERT(isParsingFragmentOrTemplateContents());
1691 ASSERT(insertionMode() == InTableBodyMode);
1692 processEndTag(token);
1695 if (isTableBodyContextTag(token->name())) {
1696 if (!m_tree.openElements()->inTableScope(token->name())) {
1700 processFakeEndTag(trTag);
1701 ASSERT(insertionMode() == InTableBodyMode);
1702 processEndTag(token);
1705 if (token->name() == bodyTag
1706 || isCaptionColOrColgroupTag(token->name())
1707 || token->name() == htmlTag
1708 || isTableCellContextTag(token->name())) {
1712 processEndTagForInTable(token);
1715 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken* token)
1717 ASSERT(token->type() == HTMLToken::EndTag);
1718 if (isTableCellContextTag(token->name())) {
1719 if (!m_tree.openElements()->inTableScope(token->name())) {
1723 m_tree.generateImpliedEndTags();
1724 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1726 m_tree.openElements()->popUntilPopped(token->name());
1727 m_tree.activeFormattingElements()->clearToLastMarker();
1728 setInsertionMode(InRowMode);
1731 if (token->name() == bodyTag
1732 || isCaptionColOrColgroupTag(token->name())
1733 || token->name() == htmlTag) {
1737 if (token->name() == tableTag
1738 || token->name() == trTag
1739 || isTableBodyContextTag(token->name())) {
1740 if (!m_tree.openElements()->inTableScope(token->name())) {
1741 ASSERT(isTableBodyContextTag(token->name()) || m_tree.openElements()->inTableScope(templateTag) || isParsingFragment());
1746 processEndTag(token);
1749 processEndTagForInBody(token);
1752 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken* token)
1754 ASSERT(token->type() == HTMLToken::EndTag);
1755 if (token->name() == bodyTag) {
1756 processBodyEndTagForInBody(token);
1759 if (token->name() == htmlTag) {
1760 AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1761 if (processBodyEndTagForInBody(&endBody))
1762 processEndTag(token);
1765 if (token->name() == addressTag
1766 || token->name() == articleTag
1767 || token->name() == asideTag
1768 || token->name() == blockquoteTag
1769 || token->name() == buttonTag
1770 || token->name() == centerTag
1771 || token->name() == detailsTag
1772 || token->name() == dirTag
1773 || token->name() == divTag
1774 || token->name() == dlTag
1775 || token->name() == fieldsetTag
1776 || token->name() == figcaptionTag
1777 || token->name() == figureTag
1778 || token->name() == footerTag
1779 || token->name() == headerTag
1780 || token->name() == hgroupTag
1781 || token->name() == listingTag
1782 || token->name() == mainTag
1783 || token->name() == menuTag
1784 || token->name() == navTag
1785 || token->name() == olTag
1786 || token->name() == preTag
1787 || token->name() == sectionTag
1788 || token->name() == summaryTag
1789 || token->name() == ulTag) {
1790 if (!m_tree.openElements()->inScope(token->name())) {
1794 m_tree.generateImpliedEndTags();
1795 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1797 m_tree.openElements()->popUntilPopped(token->name());
1800 if (token->name() == formTag) {
1801 RefPtrWillBeRawPtr<Element> node = m_tree.takeForm();
1802 if (!node || !m_tree.openElements()->inScope(node.get())) {
1806 m_tree.generateImpliedEndTags();
1807 if (m_tree.currentElement() != node.get())
1809 m_tree.openElements()->remove(node.get());
1811 if (token->name() == pTag) {
1812 if (!m_tree.openElements()->inButtonScope(token->name())) {
1814 processFakeStartTag(pTag);
1815 ASSERT(m_tree.openElements()->inScope(token->name()));
1816 processEndTag(token);
1819 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1820 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1822 m_tree.openElements()->popUntilPopped(token->name());
1825 if (token->name() == liTag) {
1826 if (!m_tree.openElements()->inListItemScope(token->name())) {
1830 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1831 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1833 m_tree.openElements()->popUntilPopped(token->name());
1836 if (token->name() == ddTag
1837 || token->name() == dtTag) {
1838 if (!m_tree.openElements()->inScope(token->name())) {
1842 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1843 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1845 m_tree.openElements()->popUntilPopped(token->name());
1848 if (isNumberedHeaderTag(token->name())) {
1849 if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1853 m_tree.generateImpliedEndTags();
1854 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1856 m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1859 if (isFormattingTag(token->name())) {
1860 callTheAdoptionAgency(token);
1863 if (token->name() == appletTag
1864 || token->name() == marqueeTag
1865 || token->name() == objectTag) {
1866 if (!m_tree.openElements()->inScope(token->name())) {
1870 m_tree.generateImpliedEndTags();
1871 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1873 m_tree.openElements()->popUntilPopped(token->name());
1874 m_tree.activeFormattingElements()->clearToLastMarker();
1877 if (token->name() == brTag) {
1879 processFakeStartTag(brTag);
1882 if (token->name() == templateTag) {
1883 processTemplateEndTag(token);
1886 processAnyOtherEndTagForInBody(token);
1889 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1891 if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1892 ASSERT(isParsingFragment());
1893 // FIXME: parse error
1896 m_tree.generateImpliedEndTags();
1897 // FIXME: parse error if (!m_tree.currentStackItem()->hasTagName(captionTag))
1898 m_tree.openElements()->popUntilPopped(captionTag.localName());
1899 m_tree.activeFormattingElements()->clearToLastMarker();
1900 setInsertionMode(InTableMode);
1904 bool HTMLTreeBuilder::processTrEndTagForInRow()
1906 if (!m_tree.openElements()->inTableScope(trTag)) {
1907 ASSERT(isParsingFragmentOrTemplateContents());
1908 // FIXME: parse error
1911 m_tree.openElements()->popUntilTableRowScopeMarker();
1912 ASSERT(m_tree.currentStackItem()->hasTagName(trTag));
1913 m_tree.openElements()->pop();
1914 setInsertionMode(InTableBodyMode);
1918 bool HTMLTreeBuilder::processTableEndTagForInTable()
1920 if (!m_tree.openElements()->inTableScope(tableTag)) {
1921 ASSERT(isParsingFragmentOrTemplateContents());
1922 // FIXME: parse error.
1925 m_tree.openElements()->popUntilPopped(tableTag.localName());
1926 resetInsertionModeAppropriately();
1930 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken* token)
1932 ASSERT(token->type() == HTMLToken::EndTag);
1933 if (token->name() == tableTag) {
1934 processTableEndTagForInTable();
1937 if (token->name() == bodyTag
1938 || isCaptionColOrColgroupTag(token->name())
1939 || token->name() == htmlTag
1940 || isTableBodyContextTag(token->name())
1941 || isTableCellContextTag(token->name())
1942 || token->name() == trTag) {
1947 // Is this redirection necessary here?
1948 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1949 processEndTagForInBody(token);
1952 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken* token)
1954 ASSERT(token->type() == HTMLToken::EndTag);
1955 switch (insertionMode()) {
1957 ASSERT(insertionMode() == InitialMode);
1958 defaultForInitial();
1960 case BeforeHTMLMode:
1961 ASSERT(insertionMode() == BeforeHTMLMode);
1962 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1966 defaultForBeforeHTML();
1968 case BeforeHeadMode:
1969 ASSERT(insertionMode() == BeforeHeadMode);
1970 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1974 defaultForBeforeHead();
1977 ASSERT(insertionMode() == InHeadMode);
1978 // FIXME: This case should be broken out into processEndTagForInHead,
1979 // because other end tag cases now refer to it ("process the token for using the rules of the "in head" insertion mode").
1980 // but because the logic falls through to AfterHeadMode, that gets a little messy.
1981 if (token->name() == templateTag) {
1982 processTemplateEndTag(token);
1985 if (token->name() == headTag) {
1986 m_tree.openElements()->popHTMLHeadElement();
1987 setInsertionMode(AfterHeadMode);
1990 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1997 ASSERT(insertionMode() == AfterHeadMode);
1998 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
2002 defaultForAfterHead();
2005 ASSERT(insertionMode() == InBodyMode);
2006 processEndTagForInBody(token);
2009 ASSERT(insertionMode() == InTableMode);
2010 processEndTagForInTable(token);
2013 ASSERT(insertionMode() == InCaptionMode);
2014 if (token->name() == captionTag) {
2015 processCaptionEndTagForInCaption();
2018 if (token->name() == tableTag) {
2020 if (!processCaptionEndTagForInCaption()) {
2021 ASSERT(isParsingFragment());
2024 processEndTag(token);
2027 if (token->name() == bodyTag
2028 || token->name() == colTag
2029 || token->name() == colgroupTag
2030 || token->name() == htmlTag
2031 || isTableBodyContextTag(token->name())
2032 || isTableCellContextTag(token->name())
2033 || token->name() == trTag) {
2037 processEndTagForInBody(token);
2039 case InColumnGroupMode:
2040 ASSERT(insertionMode() == InColumnGroupMode);
2041 if (token->name() == colgroupTag) {
2042 processColgroupEndTagForInColumnGroup();
2045 if (token->name() == colTag) {
2049 if (token->name() == templateTag) {
2050 processTemplateEndTag(token);
2053 if (!processColgroupEndTagForInColumnGroup()) {
2054 ASSERT(isParsingFragmentOrTemplateContents());
2057 processEndTag(token);
2060 ASSERT(insertionMode() == InRowMode);
2061 processEndTagForInRow(token);
2064 ASSERT(insertionMode() == InCellMode);
2065 processEndTagForInCell(token);
2067 case InTableBodyMode:
2068 ASSERT(insertionMode() == InTableBodyMode);
2069 processEndTagForInTableBody(token);
2072 ASSERT(insertionMode() == AfterBodyMode);
2073 if (token->name() == htmlTag) {
2074 if (isParsingFragment()) {
2078 setInsertionMode(AfterAfterBodyMode);
2082 case AfterAfterBodyMode:
2083 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2085 setInsertionMode(InBodyMode);
2086 processEndTag(token);
2088 case InHeadNoscriptMode:
2089 ASSERT(insertionMode() == InHeadNoscriptMode);
2090 if (token->name() == noscriptTag) {
2091 ASSERT(m_tree.currentStackItem()->hasTagName(noscriptTag));
2092 m_tree.openElements()->pop();
2093 ASSERT(m_tree.currentStackItem()->hasTagName(headTag));
2094 setInsertionMode(InHeadMode);
2097 if (token->name() != brTag) {
2101 defaultForInHeadNoscript();
2102 processToken(token);
2105 if (token->name() == scriptTag) {
2106 // Pause ourselves so that parsing stops until the script can be processed by the caller.
2107 ASSERT(m_tree.currentStackItem()->hasTagName(scriptTag));
2108 if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2109 m_scriptToProcess = m_tree.currentElement();
2110 m_tree.openElements()->pop();
2111 setInsertionMode(m_originalInsertionMode);
2113 if (m_parser->tokenizer()) {
2114 // We must set the tokenizer's state to
2115 // DataState explicitly if the tokenizer didn't have a chance to.
2116 ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_options.useThreading);
2117 m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2121 m_tree.openElements()->pop();
2122 setInsertionMode(m_originalInsertionMode);
2124 case InFramesetMode:
2125 ASSERT(insertionMode() == InFramesetMode);
2126 if (token->name() == framesetTag) {
2127 bool ignoreFramesetForFragmentParsing = m_tree.currentIsRootNode();
2128 ignoreFramesetForFragmentParsing = ignoreFramesetForFragmentParsing || m_tree.openElements()->hasTemplateInHTMLScope();
2129 if (ignoreFramesetForFragmentParsing) {
2130 ASSERT(isParsingFragmentOrTemplateContents());
2134 m_tree.openElements()->pop();
2135 if (!isParsingFragment() && !m_tree.currentStackItem()->hasTagName(framesetTag))
2136 setInsertionMode(AfterFramesetMode);
2139 if (token->name() == templateTag) {
2140 processTemplateEndTag(token);
2144 case AfterFramesetMode:
2145 ASSERT(insertionMode() == AfterFramesetMode);
2146 if (token->name() == htmlTag) {
2147 setInsertionMode(AfterAfterFramesetMode);
2151 case AfterAfterFramesetMode:
2152 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2155 case InSelectInTableMode:
2156 ASSERT(insertionMode() == InSelectInTableMode);
2157 if (token->name() == captionTag
2158 || token->name() == tableTag
2159 || isTableBodyContextTag(token->name())
2160 || token->name() == trTag
2161 || isTableCellContextTag(token->name())) {
2163 if (m_tree.openElements()->inTableScope(token->name())) {
2164 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2165 processEndTag(&endSelect);
2166 processEndTag(token);
2172 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2173 if (token->name() == optgroupTag) {
2174 if (m_tree.currentStackItem()->hasTagName(optionTag) && m_tree.oneBelowTop() && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2175 processFakeEndTag(optionTag);
2176 if (m_tree.currentStackItem()->hasTagName(optgroupTag)) {
2177 m_tree.openElements()->pop();
2183 if (token->name() == optionTag) {
2184 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
2185 m_tree.openElements()->pop();
2191 if (token->name() == selectTag) {
2192 if (!m_tree.openElements()->inSelectScope(token->name())) {
2193 ASSERT(isParsingFragment());
2197 m_tree.openElements()->popUntilPopped(selectTag.localName());
2198 resetInsertionModeAppropriately();
2201 if (token->name() == templateTag) {
2202 processTemplateEndTag(token);
2206 case InTableTextMode:
2207 defaultForInTableText();
2208 processEndTag(token);
2210 case TemplateContentsMode:
2211 if (token->name() == templateTag) {
2212 processTemplateEndTag(token);
2219 void HTMLTreeBuilder::processComment(AtomicHTMLToken* token)
2221 ASSERT(token->type() == HTMLToken::Comment);
2222 if (m_insertionMode == InitialMode
2223 || m_insertionMode == BeforeHTMLMode
2224 || m_insertionMode == AfterAfterBodyMode
2225 || m_insertionMode == AfterAfterFramesetMode) {
2226 m_tree.insertCommentOnDocument(token);
2229 if (m_insertionMode == AfterBodyMode) {
2230 m_tree.insertCommentOnHTMLHtmlElement(token);
2233 if (m_insertionMode == InTableTextMode) {
2234 defaultForInTableText();
2235 processComment(token);
2238 m_tree.insertComment(token);
2241 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken* token)
2243 ASSERT(token->type() == HTMLToken::Character);
2244 CharacterTokenBuffer buffer(token);
2245 processCharacterBuffer(buffer);
2248 void HTMLTreeBuilder::processCharacterBuffer(CharacterTokenBuffer& buffer)
2251 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2252 // Note that this logic is different than the generic \r\n collapsing
2253 // handled in the input stream preprocessor. This logic is here as an
2254 // "authoring convenience" so folks can write:
2261 // without getting an extra newline at the start of their <pre> element.
2262 if (m_shouldSkipLeadingNewline) {
2263 m_shouldSkipLeadingNewline = false;
2264 buffer.skipAtMostOneLeadingNewline();
2265 if (buffer.isEmpty())
2269 switch (insertionMode()) {
2271 ASSERT(insertionMode() == InitialMode);
2272 buffer.skipLeadingWhitespace();
2273 if (buffer.isEmpty())
2275 defaultForInitial();
2278 case BeforeHTMLMode: {
2279 ASSERT(insertionMode() == BeforeHTMLMode);
2280 buffer.skipLeadingWhitespace();
2281 if (buffer.isEmpty())
2283 defaultForBeforeHTML();
2286 case BeforeHeadMode: {
2287 ASSERT(insertionMode() == BeforeHeadMode);
2288 buffer.skipLeadingWhitespace();
2289 if (buffer.isEmpty())
2291 defaultForBeforeHead();
2295 ASSERT(insertionMode() == InHeadMode);
2296 String leadingWhitespace = buffer.takeLeadingWhitespace();
2297 if (!leadingWhitespace.isEmpty())
2298 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2299 if (buffer.isEmpty())
2304 case AfterHeadMode: {
2305 ASSERT(insertionMode() == AfterHeadMode);
2306 String leadingWhitespace = buffer.takeLeadingWhitespace();
2307 if (!leadingWhitespace.isEmpty())
2308 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2309 if (buffer.isEmpty())
2311 defaultForAfterHead();
2316 case TemplateContentsMode:
2318 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode || insertionMode() == TemplateContentsMode);
2319 processCharacterBufferForInBody(buffer);
2323 case InTableBodyMode:
2325 ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2326 ASSERT(m_pendingTableCharacters.isEmpty());
2327 if (m_tree.currentStackItem()->isElementNode()
2328 && (m_tree.currentStackItem()->hasTagName(tableTag)
2329 || m_tree.currentStackItem()->hasTagName(tbodyTag)
2330 || m_tree.currentStackItem()->hasTagName(tfootTag)
2331 || m_tree.currentStackItem()->hasTagName(theadTag)
2332 || m_tree.currentStackItem()->hasTagName(trTag))) {
2333 m_originalInsertionMode = m_insertionMode;
2334 setInsertionMode(InTableTextMode);
2335 // Note that we fall through to the InTableTextMode case below.
2337 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2338 processCharacterBufferForInBody(buffer);
2343 case InTableTextMode: {
2344 buffer.giveRemainingTo(m_pendingTableCharacters);
2347 case InColumnGroupMode: {
2348 ASSERT(insertionMode() == InColumnGroupMode);
2349 String leadingWhitespace = buffer.takeLeadingWhitespace();
2350 if (!leadingWhitespace.isEmpty())
2351 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2352 if (buffer.isEmpty())
2354 if (!processColgroupEndTagForInColumnGroup()) {
2355 ASSERT(isParsingFragmentOrTemplateContents());
2356 // The spec tells us to drop these characters on the floor.
2357 buffer.skipLeadingNonWhitespace();
2358 if (buffer.isEmpty())
2361 goto ReprocessBuffer;
2364 case AfterAfterBodyMode: {
2365 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2366 // FIXME: parse error
2367 setInsertionMode(InBodyMode);
2368 goto ReprocessBuffer;
2371 ASSERT(insertionMode() == TextMode);
2372 m_tree.insertTextNode(buffer.takeRemaining());
2375 case InHeadNoscriptMode: {
2376 ASSERT(insertionMode() == InHeadNoscriptMode);
2377 String leadingWhitespace = buffer.takeLeadingWhitespace();
2378 if (!leadingWhitespace.isEmpty())
2379 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2380 if (buffer.isEmpty())
2382 defaultForInHeadNoscript();
2383 goto ReprocessBuffer;
2385 case InFramesetMode:
2386 case AfterFramesetMode: {
2387 ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2388 String leadingWhitespace = buffer.takeRemainingWhitespace();
2389 if (!leadingWhitespace.isEmpty())
2390 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2391 // FIXME: We should generate a parse error if we skipped over any
2392 // non-whitespace characters.
2395 case InSelectInTableMode:
2396 case InSelectMode: {
2397 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2398 m_tree.insertTextNode(buffer.takeRemaining());
2401 case AfterAfterFramesetMode: {
2402 String leadingWhitespace = buffer.takeRemainingWhitespace();
2403 if (!leadingWhitespace.isEmpty()) {
2404 m_tree.reconstructTheActiveFormattingElements();
2405 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2407 // FIXME: We should generate a parse error if we skipped over any
2408 // non-whitespace characters.
2414 void HTMLTreeBuilder::processCharacterBufferForInBody(CharacterTokenBuffer& buffer)
2416 m_tree.reconstructTheActiveFormattingElements();
2417 const String& characters = buffer.takeRemaining();
2418 m_tree.insertTextNode(characters);
2419 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2420 m_framesetOk = false;
2423 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken* token)
2425 ASSERT(token->type() == HTMLToken::EndOfFile);
2426 switch (insertionMode()) {
2428 ASSERT(insertionMode() == InitialMode);
2429 defaultForInitial();
2431 case BeforeHTMLMode:
2432 ASSERT(insertionMode() == BeforeHTMLMode);
2433 defaultForBeforeHTML();
2435 case BeforeHeadMode:
2436 ASSERT(insertionMode() == BeforeHeadMode);
2437 defaultForBeforeHead();
2440 ASSERT(insertionMode() == InHeadMode);
2444 ASSERT(insertionMode() == AfterHeadMode);
2445 defaultForAfterHead();
2451 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode || insertionMode() == TemplateContentsMode);
2452 notImplemented(); // Emit parse error based on what elements are still open.
2453 if (!m_templateInsertionModes.isEmpty() && processEndOfFileForInTemplateContents(token))
2457 case AfterAfterBodyMode:
2458 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2460 case InHeadNoscriptMode:
2461 ASSERT(insertionMode() == InHeadNoscriptMode);
2462 defaultForInHeadNoscript();
2463 processEndOfFile(token);
2465 case AfterFramesetMode:
2466 case AfterAfterFramesetMode:
2467 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2469 case InColumnGroupMode:
2470 if (m_tree.currentIsRootNode()) {
2471 ASSERT(isParsingFragment());
2472 return; // FIXME: Should we break here instead of returning?
2474 ASSERT(m_tree.currentNode()->hasTagName(colgroupTag) || isHTMLTemplateElement(m_tree.currentNode()));
2475 processColgroupEndTagForInColumnGroup();
2477 case InFramesetMode:
2479 case InTableBodyMode:
2480 case InSelectInTableMode:
2482 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode || insertionMode() == InColumnGroupMode);
2483 if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2485 if (!m_templateInsertionModes.isEmpty() && processEndOfFileForInTemplateContents(token))
2488 case InTableTextMode:
2489 defaultForInTableText();
2490 processEndOfFile(token);
2494 if (m_tree.currentStackItem()->hasTagName(scriptTag))
2495 notImplemented(); // mark the script element as "already started".
2496 m_tree.openElements()->pop();
2497 ASSERT(m_originalInsertionMode != TextMode);
2498 setInsertionMode(m_originalInsertionMode);
2499 processEndOfFile(token);
2501 case TemplateContentsMode:
2502 if (processEndOfFileForInTemplateContents(token))
2506 m_tree.processEndOfFile();
2509 void HTMLTreeBuilder::defaultForInitial()
2512 m_tree.setDefaultCompatibilityMode();
2513 // FIXME: parse error
2514 setInsertionMode(BeforeHTMLMode);
2517 void HTMLTreeBuilder::defaultForBeforeHTML()
2519 AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2520 m_tree.insertHTMLHtmlStartTagBeforeHTML(&startHTML);
2521 setInsertionMode(BeforeHeadMode);
2524 void HTMLTreeBuilder::defaultForBeforeHead()
2526 AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2527 processStartTag(&startHead);
2530 void HTMLTreeBuilder::defaultForInHead()
2532 AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2533 processEndTag(&endHead);
2536 void HTMLTreeBuilder::defaultForInHeadNoscript()
2538 AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2539 processEndTag(&endNoscript);
2542 void HTMLTreeBuilder::defaultForAfterHead()
2544 AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2545 processStartTag(&startBody);
2546 m_framesetOk = true;
2549 void HTMLTreeBuilder::defaultForInTableText()
2551 String characters = m_pendingTableCharacters.toString();
2552 m_pendingTableCharacters.clear();
2553 if (!isAllWhitespace(characters)) {
2554 // FIXME: parse error
2555 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2556 m_tree.reconstructTheActiveFormattingElements();
2557 m_tree.insertTextNode(characters, NotAllWhitespace);
2558 m_framesetOk = false;
2559 setInsertionMode(m_originalInsertionMode);
2562 m_tree.insertTextNode(characters);
2563 setInsertionMode(m_originalInsertionMode);
2566 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken* token)
2568 ASSERT(token->type() == HTMLToken::StartTag);
2569 if (token->name() == htmlTag) {
2570 processHtmlStartTagForInBody(token);
2573 if (token->name() == baseTag
2574 || token->name() == basefontTag
2575 || token->name() == bgsoundTag
2576 || token->name() == commandTag
2577 || token->name() == linkTag
2578 || token->name() == metaTag) {
2579 m_tree.insertSelfClosingHTMLElement(token);
2580 // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2583 if (token->name() == titleTag) {
2584 processGenericRCDATAStartTag(token);
2587 if (token->name() == noscriptTag) {
2588 if (m_options.scriptEnabled) {
2589 processGenericRawTextStartTag(token);
2592 m_tree.insertHTMLElement(token);
2593 setInsertionMode(InHeadNoscriptMode);
2596 if (token->name() == noframesTag || token->name() == styleTag) {
2597 processGenericRawTextStartTag(token);
2600 if (token->name() == scriptTag) {
2601 processScriptStartTag(token);
2604 if (token->name() == templateTag) {
2605 processTemplateStartTag(token);
2608 if (token->name() == headTag) {
2615 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken* token)
2617 ASSERT(token->type() == HTMLToken::StartTag);
2618 m_tree.insertHTMLElement(token);
2619 if (m_parser->tokenizer())
2620 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2621 m_originalInsertionMode = m_insertionMode;
2622 setInsertionMode(TextMode);
2625 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken* token)
2627 ASSERT(token->type() == HTMLToken::StartTag);
2628 m_tree.insertHTMLElement(token);
2629 if (m_parser->tokenizer())
2630 m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2631 m_originalInsertionMode = m_insertionMode;
2632 setInsertionMode(TextMode);
2635 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken* token)
2637 ASSERT(token->type() == HTMLToken::StartTag);
2638 m_tree.insertScriptElement(token);
2639 if (m_parser->tokenizer())
2640 m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2641 m_originalInsertionMode = m_insertionMode;
2643 TextPosition position = m_parser->textPosition();
2645 m_scriptToProcessStartPosition = position;
2647 setInsertionMode(TextMode);
2650 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
2651 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken* token)
2653 if (m_tree.isEmpty())
2655 HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
2657 if (adjustedCurrentNode->isInHTMLNamespace())
2659 if (HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode)) {
2660 if (token->type() == HTMLToken::StartTag
2661 && token->name() != MathMLNames::mglyphTag
2662 && token->name() != MathMLNames::malignmarkTag)
2664 if (token->type() == HTMLToken::Character)
2667 if (adjustedCurrentNode->hasTagName(MathMLNames::annotation_xmlTag)
2668 && token->type() == HTMLToken::StartTag
2669 && token->name() == SVGNames::svgTag)
2671 if (HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)) {
2672 if (token->type() == HTMLToken::StartTag)
2674 if (token->type() == HTMLToken::Character)
2677 if (token->type() == HTMLToken::EndOfFile)
2682 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken* token)
2684 if (token->type() == HTMLToken::Character) {
2685 const String& characters = token->characters();
2686 m_tree.insertTextNode(characters);
2687 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2688 m_framesetOk = false;
2692 m_tree.flush(FlushAlways);
2693 HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
2695 switch (token->type()) {
2696 case HTMLToken::Uninitialized:
2697 ASSERT_NOT_REACHED();
2699 case HTMLToken::DOCTYPE:
2702 case HTMLToken::StartTag: {
2703 if (token->name() == bTag
2704 || token->name() == bigTag
2705 || token->name() == blockquoteTag
2706 || token->name() == bodyTag
2707 || token->name() == brTag
2708 || token->name() == centerTag
2709 || token->name() == codeTag
2710 || token->name() == ddTag
2711 || token->name() == divTag
2712 || token->name() == dlTag
2713 || token->name() == dtTag
2714 || token->name() == emTag
2715 || token->name() == embedTag
2716 || isNumberedHeaderTag(token->name())
2717 || token->name() == headTag
2718 || token->name() == hrTag
2719 || token->name() == iTag
2720 || token->name() == imgTag
2721 || token->name() == liTag
2722 || token->name() == listingTag
2723 || token->name() == menuTag
2724 || token->name() == metaTag
2725 || token->name() == nobrTag
2726 || token->name() == olTag
2727 || token->name() == pTag
2728 || token->name() == preTag
2729 || token->name() == rubyTag
2730 || token->name() == sTag
2731 || token->name() == smallTag
2732 || token->name() == spanTag
2733 || token->name() == strongTag
2734 || token->name() == strikeTag
2735 || token->name() == subTag
2736 || token->name() == supTag
2737 || token->name() == tableTag
2738 || token->name() == ttTag
2739 || token->name() == uTag
2740 || token->name() == ulTag
2741 || token->name() == varTag
2742 || (token->name() == fontTag && (token->getAttributeItem(colorAttr) || token->getAttributeItem(faceAttr) || token->getAttributeItem(sizeAttr)))) {
2744 m_tree.openElements()->popUntilForeignContentScopeMarker();
2745 processStartTag(token);
2748 const AtomicString& currentNamespace = adjustedCurrentNode->namespaceURI();
2749 if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2750 adjustMathMLAttributes(token);
2751 if (currentNamespace == SVGNames::svgNamespaceURI) {
2752 adjustSVGTagNameCase(token);
2753 adjustSVGAttributes(token);
2755 adjustForeignAttributes(token);
2756 m_tree.insertForeignElement(token, currentNamespace);
2759 case HTMLToken::EndTag: {
2760 if (adjustedCurrentNode->namespaceURI() == SVGNames::svgNamespaceURI)
2761 adjustSVGTagNameCase(token);
2763 if (token->name() == SVGNames::scriptTag && m_tree.currentStackItem()->hasTagName(SVGNames::scriptTag)) {
2764 if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2765 m_scriptToProcess = m_tree.currentElement();
2766 m_tree.openElements()->pop();
2769 if (!m_tree.currentStackItem()->isInHTMLNamespace()) {
2770 // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2771 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2772 if (!nodeRecord->stackItem()->hasLocalName(token->name()))
2775 if (nodeRecord->stackItem()->hasLocalName(token->name())) {
2776 m_tree.openElements()->popUntilPopped(nodeRecord->element());
2779 nodeRecord = nodeRecord->next();
2781 if (nodeRecord->stackItem()->isInHTMLNamespace())
2785 // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2786 processEndTag(token);
2789 case HTMLToken::Comment:
2790 m_tree.insertComment(token);
2792 case HTMLToken::Character:
2793 case HTMLToken::EndOfFile:
2794 ASSERT_NOT_REACHED();
2799 void HTMLTreeBuilder::finished()
2801 if (isParsingFragment())
2804 ASSERT(m_templateInsertionModes.isEmpty());
2805 ASSERT(m_isAttached);
2806 // Warning, this may detach the parser. Do not do anything else after this.
2807 m_tree.finishedParsing();
2810 void HTMLTreeBuilder::parseError(AtomicHTMLToken*)
2814 } // namespace blink