2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef HTMLDocumentParser_h
27 #define HTMLDocumentParser_h
29 #include "core/dom/ParserContentPolicy.h"
30 #include "core/dom/ScriptableDocumentParser.h"
31 #include "core/fetch/ResourceClient.h"
32 #include "core/frame/UseCounter.h"
33 #include "core/html/parser/BackgroundHTMLInputStream.h"
34 #include "core/html/parser/CompactHTMLToken.h"
35 #include "core/html/parser/HTMLInputStream.h"
36 #include "core/html/parser/HTMLParserOptions.h"
37 #include "core/html/parser/HTMLPreloadScanner.h"
38 #include "core/html/parser/HTMLScriptRunnerHost.h"
39 #include "core/html/parser/HTMLSourceTracker.h"
40 #include "core/html/parser/HTMLToken.h"
41 #include "core/html/parser/HTMLTokenizer.h"
42 #include "core/html/parser/HTMLTreeBuilderSimulator.h"
43 #include "core/html/parser/TextResourceDecoder.h"
44 #include "core/html/parser/XSSAuditor.h"
45 #include "core/html/parser/XSSAuditorDelegate.h"
46 #include "platform/text/SegmentedString.h"
47 #include "wtf/Deque.h"
48 #include "wtf/OwnPtr.h"
49 #include "wtf/WeakPtr.h"
50 #include "wtf/text/TextPosition.h"
54 class BackgroundHTMLParser;
55 class CompactHTMLToken;
57 class DocumentFragment;
59 class HTMLParserScheduler;
60 class HTMLScriptRunner;
61 class HTMLTreeBuilder;
62 class HTMLResourcePreloader;
63 class ScriptController;
64 class ScriptSourceCode;
68 class HTMLDocumentParser : public ScriptableDocumentParser, private HTMLScriptRunnerHost {
69 WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED;
70 WILL_BE_USING_GARBAGE_COLLECTED_MIXIN(HTMLDocumentParser);
72 static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(HTMLDocument& document, bool reportErrors)
74 return adoptRefWillBeNoop(new HTMLDocumentParser(document, reportErrors));
76 virtual ~HTMLDocumentParser();
77 virtual void trace(Visitor*) OVERRIDE;
79 // Exposed for HTMLParserScheduler
80 void resumeParsingAfterYield();
82 static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, ParserContentPolicy = AllowScriptingContent);
84 HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
86 virtual TextPosition textPosition() const OVERRIDE FINAL;
87 virtual OrdinalNumber lineNumber() const OVERRIDE FINAL;
89 virtual void suspendScheduledTasks() OVERRIDE FINAL;
90 virtual void resumeScheduledTasks() OVERRIDE FINAL;
93 OwnPtr<CompactHTMLTokenStream> tokens;
94 PreloadRequestStream preloads;
95 XSSInfoStream xssInfos;
96 HTMLTokenizer::State tokenizerState;
97 HTMLTreeBuilderSimulator::State treeBuilderState;
98 HTMLInputCheckpoint inputCheckpoint;
99 TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
101 void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
102 void didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData&);
104 virtual void appendBytes(const char* bytes, size_t length) OVERRIDE;
105 virtual void flush() OVERRIDE FINAL;
106 virtual void setDecoder(PassOwnPtr<TextResourceDecoder>) OVERRIDE FINAL;
108 UseCounter* useCounter() { return UseCounter::getFrom(contextForParsingSession()); }
111 virtual void insert(const SegmentedString&) OVERRIDE FINAL;
112 virtual void append(PassRefPtr<StringImpl>) OVERRIDE;
113 virtual void finish() OVERRIDE FINAL;
115 HTMLDocumentParser(HTMLDocument&, bool reportErrors);
116 HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy);
118 HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
120 void forcePlaintextForTextDocument();
123 static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
125 return adoptRefWillBeNoop(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy));
129 virtual void pinToMainThread() OVERRIDE FINAL;
130 virtual void detach() OVERRIDE FINAL;
131 virtual bool hasInsertionPoint() OVERRIDE FINAL;
132 virtual bool processingData() const OVERRIDE FINAL;
133 virtual void prepareToStopParsing() OVERRIDE FINAL;
134 virtual void stopParsing() OVERRIDE FINAL;
135 virtual bool isWaitingForScripts() const OVERRIDE FINAL;
136 virtual bool isExecutingScript() const OVERRIDE FINAL;
137 virtual void executeScriptsWaitingForResources() OVERRIDE FINAL;
139 // HTMLScriptRunnerHost
140 virtual void notifyScriptLoaded(Resource*) OVERRIDE FINAL;
141 virtual HTMLInputStream& inputStream() OVERRIDE FINAL { return m_input; }
142 virtual bool hasPreloadScanner() const OVERRIDE FINAL { return m_preloadScanner.get() && !shouldUseThreading(); }
143 virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE FINAL;
145 void startBackgroundParser();
146 void stopBackgroundParser();
147 void validateSpeculations(PassOwnPtr<ParsedChunk> lastChunk);
148 void discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunk, PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
149 void processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
150 void pumpPendingSpeculations();
152 Document* contextForParsingSession();
154 enum SynchronousMode {
158 bool canTakeNextToken(SynchronousMode, PumpSession&);
159 void pumpTokenizer(SynchronousMode);
160 void pumpTokenizerIfPossible(SynchronousMode);
161 void constructTreeFromHTMLToken(HTMLToken&);
162 void constructTreeFromCompactHTMLToken(const CompactHTMLToken&);
164 void runScriptsForPausedTreeBuilder();
165 void resumeParsingAfterScriptExecution();
169 void attemptToRunDeferredScriptsAndEnd();
172 bool shouldUseThreading() const { return m_options.useThreading && !m_isPinnedToMainThread; }
174 bool isParsingFragment() const;
175 bool isScheduledForResume() const;
176 bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; }
177 bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); }
179 HTMLToken& token() { return *m_token; }
181 HTMLParserOptions m_options;
182 HTMLInputStream m_input;
184 OwnPtr<HTMLToken> m_token;
185 OwnPtr<HTMLTokenizer> m_tokenizer;
186 OwnPtrWillBeMember<HTMLScriptRunner> m_scriptRunner;
187 OwnPtrWillBeMember<HTMLTreeBuilder> m_treeBuilder;
188 OwnPtr<HTMLPreloadScanner> m_preloadScanner;
189 OwnPtr<HTMLPreloadScanner> m_insertionPreloadScanner;
190 OwnPtr<HTMLParserScheduler> m_parserScheduler;
191 HTMLSourceTracker m_sourceTracker;
192 TextPosition m_textPosition;
193 XSSAuditor m_xssAuditor;
194 XSSAuditorDelegate m_xssAuditorDelegate;
196 // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object
197 // so they can be set and cleared together and passed between threads together.
198 OwnPtr<ParsedChunk> m_lastChunkBeforeScript;
199 Deque<OwnPtr<ParsedChunk> > m_speculations;
200 WeakPtrFactory<HTMLDocumentParser> m_weakFactory;
201 WeakPtr<BackgroundHTMLParser> m_backgroundParser;
202 OwnPtrWillBeMember<HTMLResourcePreloader> m_preloader;
204 bool m_isPinnedToMainThread;
205 bool m_endWasDelayed;
206 bool m_haveBackgroundParser;
207 unsigned m_pumpSessionNestingLevel;