2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #ifndef HTMLTreeBuilder_h
28 #define HTMLTreeBuilder_h
30 #include "core/html/parser/HTMLConstructionSite.h"
31 #include "core/html/parser/HTMLElementStack.h"
32 #include "core/html/parser/HTMLParserOptions.h"
33 #include "platform/heap/Handle.h"
34 #include "wtf/Noncopyable.h"
35 #include "wtf/PassOwnPtr.h"
36 #include "wtf/PassRefPtr.h"
37 #include "wtf/RefPtr.h"
38 #include "wtf/Vector.h"
39 #include "wtf/text/StringBuilder.h"
40 #include "wtf/text/TextPosition.h"
44 class AtomicHTMLToken;
46 class DocumentFragment;
52 class HTMLDocumentParser;
54 class HTMLTreeBuilder FINAL : public NoBaseWillBeGarbageCollectedFinalized<HTMLTreeBuilder> {
55 WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED;
57 static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options)
59 return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options));
61 static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
63 return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options));
68 const HTMLElementStack* openElements() const { return m_tree.openElements(); }
70 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
71 bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); }
72 bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); }
76 void constructTree(AtomicHTMLToken*);
78 bool hasParserBlockingScript() const { return !!m_scriptToProcess; }
79 // Must be called to take the parser-blocking script before calling the parser again.
80 PassRefPtrWillBeRawPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition);
82 // Done, close any open tags, etc.
85 // Synchronously flush pending text and queued tasks, possibly creating more DOM nodes.
86 // Flushing pending text depends on |mode|.
87 void flush(FlushMode mode) { m_tree.flush(mode); }
89 void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; }
92 class CharacterTokenBuffer;
93 // Represents HTML5 "insertion mode"
94 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
102 TemplateContentsMode,
118 AfterAfterFramesetMode,
121 HTMLTreeBuilder(HTMLDocumentParser*, HTMLDocument*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&);
122 HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&);
124 void processToken(AtomicHTMLToken*);
126 void processDoctypeToken(AtomicHTMLToken*);
127 void processStartTag(AtomicHTMLToken*);
128 void processEndTag(AtomicHTMLToken*);
129 void processComment(AtomicHTMLToken*);
130 void processCharacter(AtomicHTMLToken*);
131 void processEndOfFile(AtomicHTMLToken*);
133 bool processStartTagForInHead(AtomicHTMLToken*);
134 void processStartTagForInBody(AtomicHTMLToken*);
135 void processStartTagForInTable(AtomicHTMLToken*);
136 void processEndTagForInBody(AtomicHTMLToken*);
137 void processEndTagForInTable(AtomicHTMLToken*);
138 void processEndTagForInTableBody(AtomicHTMLToken*);
139 void processEndTagForInRow(AtomicHTMLToken*);
140 void processEndTagForInCell(AtomicHTMLToken*);
142 void processIsindexStartTagForInBody(AtomicHTMLToken*);
143 void processHtmlStartTagForInBody(AtomicHTMLToken*);
144 bool processBodyEndTagForInBody(AtomicHTMLToken*);
145 bool processTableEndTagForInTable();
146 bool processCaptionEndTagForInCaption();
147 bool processColgroupEndTagForInColumnGroup();
148 bool processTrEndTagForInRow();
149 // FIXME: This function should be inlined into its one call site or it
150 // needs to assert which tokens it can be called with.
151 void processAnyOtherEndTagForInBody(AtomicHTMLToken*);
153 void processCharacterBuffer(CharacterTokenBuffer&);
154 inline void processCharacterBufferForInBody(CharacterTokenBuffer&);
156 void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>());
157 void processFakeEndTag(const QualifiedName&);
158 void processFakeEndTag(const AtomicString&);
159 void processFakePEndTagIfPInButtonScope();
161 void processGenericRCDATAStartTag(AtomicHTMLToken*);
162 void processGenericRawTextStartTag(AtomicHTMLToken*);
163 void processScriptStartTag(AtomicHTMLToken*);
165 // Default processing for the different insertion modes.
166 void defaultForInitial();
167 void defaultForBeforeHTML();
168 void defaultForBeforeHead();
169 void defaultForInHead();
170 void defaultForInHeadNoscript();
171 void defaultForAfterHead();
172 void defaultForInTableText();
174 inline HTMLStackItem* adjustedCurrentStackItem() const;
175 inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*);
176 void processTokenInForeignContent(AtomicHTMLToken*);
178 Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*);
180 void callTheAdoptionAgency(AtomicHTMLToken*);
184 template <bool shouldClose(const HTMLStackItem*)>
185 void processCloseWhenNestedTag(AtomicHTMLToken*);
187 void parseError(AtomicHTMLToken*);
189 InsertionMode insertionMode() const { return m_insertionMode; }
190 void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; }
192 void resetInsertionModeAppropriately();
194 void processTemplateStartTag(AtomicHTMLToken*);
195 bool processTemplateEndTag(AtomicHTMLToken*);
196 bool processEndOfFileForInTemplateContents(AtomicHTMLToken*);
198 class FragmentParsingContext {
199 WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
200 DISALLOW_ALLOCATION();
202 FragmentParsingContext();
203 FragmentParsingContext(DocumentFragment*, Element* contextElement);
204 ~FragmentParsingContext();
206 DocumentFragment* fragment() const { return m_fragment; }
207 Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); }
208 HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); }
210 void trace(Visitor*);
213 RawPtrWillBeMember<DocumentFragment> m_fragment;
214 RefPtrWillBeMember<HTMLStackItem> m_contextElementStackItem;
221 FragmentParsingContext m_fragmentContext;
222 HTMLConstructionSite m_tree;
224 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
225 InsertionMode m_insertionMode;
227 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode
228 InsertionMode m_originalInsertionMode;
230 Vector<InsertionMode> m_templateInsertionModes;
232 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
233 StringBuilder m_pendingTableCharacters;
235 bool m_shouldSkipLeadingNewline;
237 // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
238 // from within parser actions. We also need it to track the current position.
239 RawPtrWillBeMember<HTMLDocumentParser> m_parser;
241 RefPtrWillBeMember<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
242 TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
244 HTMLParserOptions m_options;