2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #ifndef HTMLTreeBuilder_h
28 #define HTMLTreeBuilder_h
30 #include "core/html/parser/HTMLConstructionSite.h"
31 #include "core/html/parser/HTMLElementStack.h"
32 #include "core/html/parser/HTMLParserOptions.h"
33 #include "platform/heap/Handle.h"
34 #include "wtf/Noncopyable.h"
35 #include "wtf/PassOwnPtr.h"
36 #include "wtf/PassRefPtr.h"
37 #include "wtf/RefPtr.h"
38 #include "wtf/Vector.h"
39 #include "wtf/text/StringBuilder.h"
40 #include "wtf/text/TextPosition.h"
44 class AtomicHTMLToken;
46 class DocumentFragment;
52 class HTMLDocumentParser;
54 class HTMLTreeBuilder FINAL : public NoBaseWillBeGarbageCollectedFinalized<HTMLTreeBuilder> {
55 WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED;
57 static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options)
59 return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options));
61 static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
63 return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options));
68 const HTMLElementStack* openElements() const { return m_tree.openElements(); }
70 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
71 bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); }
72 bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); }
76 void constructTree(AtomicHTMLToken*);
78 bool hasParserBlockingScript() const { return !!m_scriptToProcess; }
79 // Must be called to take the parser-blocking script before calling the parser again.
80 PassRefPtrWillBeRawPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition);
82 // Done, close any open tags, etc.
85 // Synchronously empty any queues, possibly creating more DOM nodes.
86 void flush() { m_tree.flush(); }
88 void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; }
91 class CharacterTokenBuffer;
92 // Represents HTML5 "insertion mode"
93 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
101 TemplateContentsMode,
117 AfterAfterFramesetMode,
120 HTMLTreeBuilder(HTMLDocumentParser*, HTMLDocument*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&);
121 HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&);
123 void processToken(AtomicHTMLToken*);
125 void processDoctypeToken(AtomicHTMLToken*);
126 void processStartTag(AtomicHTMLToken*);
127 void processEndTag(AtomicHTMLToken*);
128 void processComment(AtomicHTMLToken*);
129 void processCharacter(AtomicHTMLToken*);
130 void processEndOfFile(AtomicHTMLToken*);
132 bool processStartTagForInHead(AtomicHTMLToken*);
133 void processStartTagForInBody(AtomicHTMLToken*);
134 void processStartTagForInTable(AtomicHTMLToken*);
135 void processEndTagForInBody(AtomicHTMLToken*);
136 void processEndTagForInTable(AtomicHTMLToken*);
137 void processEndTagForInTableBody(AtomicHTMLToken*);
138 void processEndTagForInRow(AtomicHTMLToken*);
139 void processEndTagForInCell(AtomicHTMLToken*);
141 void processIsindexStartTagForInBody(AtomicHTMLToken*);
142 void processHtmlStartTagForInBody(AtomicHTMLToken*);
143 bool processBodyEndTagForInBody(AtomicHTMLToken*);
144 bool processTableEndTagForInTable();
145 bool processCaptionEndTagForInCaption();
146 bool processColgroupEndTagForInColumnGroup();
147 bool processTrEndTagForInRow();
148 // FIXME: This function should be inlined into its one call site or it
149 // needs to assert which tokens it can be called with.
150 void processAnyOtherEndTagForInBody(AtomicHTMLToken*);
152 void processCharacterBuffer(CharacterTokenBuffer&);
153 inline void processCharacterBufferForInBody(CharacterTokenBuffer&);
155 void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>());
156 void processFakeEndTag(const QualifiedName&);
157 void processFakeEndTag(const AtomicString&);
158 void processFakePEndTagIfPInButtonScope();
160 void processGenericRCDATAStartTag(AtomicHTMLToken*);
161 void processGenericRawTextStartTag(AtomicHTMLToken*);
162 void processScriptStartTag(AtomicHTMLToken*);
164 // Default processing for the different insertion modes.
165 void defaultForInitial();
166 void defaultForBeforeHTML();
167 void defaultForBeforeHead();
168 void defaultForInHead();
169 void defaultForInHeadNoscript();
170 void defaultForAfterHead();
171 void defaultForInTableText();
173 inline HTMLStackItem* adjustedCurrentStackItem() const;
174 inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*);
175 void processTokenInForeignContent(AtomicHTMLToken*);
177 Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*);
179 void callTheAdoptionAgency(AtomicHTMLToken*);
183 template <bool shouldClose(const HTMLStackItem*)>
184 void processCloseWhenNestedTag(AtomicHTMLToken*);
186 void parseError(AtomicHTMLToken*);
188 InsertionMode insertionMode() const { return m_insertionMode; }
189 void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; }
191 void resetInsertionModeAppropriately();
193 void processTemplateStartTag(AtomicHTMLToken*);
194 bool processTemplateEndTag(AtomicHTMLToken*);
195 bool processEndOfFileForInTemplateContents(AtomicHTMLToken*);
197 class FragmentParsingContext {
198 WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
199 DISALLOW_ALLOCATION();
201 FragmentParsingContext();
202 FragmentParsingContext(DocumentFragment*, Element* contextElement);
203 ~FragmentParsingContext();
205 DocumentFragment* fragment() const { return m_fragment; }
206 Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); }
207 HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); }
209 void trace(Visitor*);
212 RawPtrWillBeMember<DocumentFragment> m_fragment;
213 RefPtrWillBeMember<HTMLStackItem> m_contextElementStackItem;
220 FragmentParsingContext m_fragmentContext;
221 HTMLConstructionSite m_tree;
223 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
224 InsertionMode m_insertionMode;
226 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode
227 InsertionMode m_originalInsertionMode;
229 Vector<InsertionMode> m_templateInsertionModes;
231 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
232 StringBuilder m_pendingTableCharacters;
234 bool m_shouldSkipLeadingNewline;
236 // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
237 // from within parser actions. We also need it to track the current position.
238 RawPtrWillBeMember<HTMLDocumentParser> m_parser;
240 RefPtrWillBeMember<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
241 TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
243 HTMLParserOptions m_options;