2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "core/html/parser/HTMLDocumentParser.h"
29 #include "HTMLNames.h"
30 #include "core/dom/DocumentFragment.h"
31 #include "core/dom/Element.h"
32 #include "core/frame/LocalFrame.h"
33 #include "core/html/HTMLDocument.h"
34 #include "core/html/parser/AtomicHTMLToken.h"
35 #include "core/html/parser/BackgroundHTMLParser.h"
36 #include "core/html/parser/HTMLParserScheduler.h"
37 #include "core/html/parser/HTMLParserThread.h"
38 #include "core/html/parser/HTMLScriptRunner.h"
39 #include "core/html/parser/HTMLTreeBuilder.h"
40 #include "core/inspector/InspectorInstrumentation.h"
41 #include "platform/SharedBuffer.h"
42 #include "platform/TraceEvent.h"
43 #include "wtf/Functional.h"
47 using namespace HTMLNames;
49 // This is a direct transcription of step 4 from:
50 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
51 static HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors, const HTMLParserOptions& options)
54 return HTMLTokenizer::DataState;
56 const QualifiedName& contextTag = contextElement->tagQName();
58 if (contextTag.matches(titleTag) || contextTag.matches(textareaTag))
59 return HTMLTokenizer::RCDATAState;
60 if (contextTag.matches(styleTag)
61 || contextTag.matches(xmpTag)
62 || contextTag.matches(iframeTag)
63 || (contextTag.matches(noembedTag) && options.pluginsEnabled)
64 || (contextTag.matches(noscriptTag) && options.scriptEnabled)
65 || contextTag.matches(noframesTag))
66 return reportErrors ? HTMLTokenizer::RAWTEXTState : HTMLTokenizer::PLAINTEXTState;
67 if (contextTag.matches(scriptTag))
68 return reportErrors ? HTMLTokenizer::ScriptDataState : HTMLTokenizer::PLAINTEXTState;
69 if (contextTag.matches(plaintextTag))
70 return HTMLTokenizer::PLAINTEXTState;
71 return HTMLTokenizer::DataState;
74 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
75 : ScriptableDocumentParser(document)
77 , m_token(m_options.useThreading ? nullptr : adoptPtr(new HTMLToken))
78 , m_tokenizer(m_options.useThreading ? nullptr : HTMLTokenizer::create(m_options))
79 , m_scriptRunner(HTMLScriptRunner::create(document, this))
80 , m_treeBuilder(HTMLTreeBuilder::create(this, document, parserContentPolicy(), reportErrors, m_options))
81 , m_parserScheduler(HTMLParserScheduler::create(this))
82 , m_xssAuditorDelegate(document)
84 , m_preloader(adoptPtr(new HTMLResourcePreloader(document)))
85 , m_isPinnedToMainThread(false)
86 , m_endWasDelayed(false)
87 , m_haveBackgroundParser(false)
88 , m_pumpSessionNestingLevel(0)
90 ASSERT(shouldUseThreading() || (m_token && m_tokenizer));
93 // FIXME: Member variables should be grouped into self-initializing structs to
94 // minimize code duplication between these constructors.
95 HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
96 : ScriptableDocumentParser(&fragment->document(), parserContentPolicy)
97 , m_options(&fragment->document())
98 , m_token(adoptPtr(new HTMLToken))
99 , m_tokenizer(HTMLTokenizer::create(m_options))
100 , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, this->parserContentPolicy(), m_options))
101 , m_xssAuditorDelegate(&fragment->document())
102 , m_weakFactory(this)
103 , m_isPinnedToMainThread(true)
104 , m_endWasDelayed(false)
105 , m_haveBackgroundParser(false)
106 , m_pumpSessionNestingLevel(0)
108 ASSERT(!shouldUseThreading());
109 bool reportErrors = false; // For now document fragment parsing never reports errors.
110 m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options));
111 m_xssAuditor.initForFragment();
114 HTMLDocumentParser::~HTMLDocumentParser()
116 ASSERT(!m_parserScheduler);
117 ASSERT(!m_pumpSessionNestingLevel);
118 ASSERT(!m_preloadScanner);
119 ASSERT(!m_insertionPreloadScanner);
120 ASSERT(!m_haveBackgroundParser);
121 // FIXME: We should be able to ASSERT(m_speculations.isEmpty()),
122 // but there are cases where that's not true currently. For example,
123 // we we're told to stop parsing before we've consumed all the input.
126 void HTMLDocumentParser::pinToMainThread()
128 ASSERT(!m_haveBackgroundParser);
129 ASSERT(!m_isPinnedToMainThread);
130 m_isPinnedToMainThread = true;
133 m_token = adoptPtr(new HTMLToken);
134 m_tokenizer = HTMLTokenizer::create(m_options);
138 void HTMLDocumentParser::detach()
140 if (m_haveBackgroundParser)
141 stopBackgroundParser();
142 DocumentParser::detach();
144 m_scriptRunner->detach();
145 m_treeBuilder->detach();
146 // FIXME: It seems wrong that we would have a preload scanner here.
147 // Yet during fast/dom/HTMLScriptElement/script-load-events.html we do.
148 m_preloadScanner.clear();
149 m_insertionPreloadScanner.clear();
150 m_parserScheduler.clear(); // Deleting the scheduler will clear any timers.
153 void HTMLDocumentParser::stopParsing()
155 DocumentParser::stopParsing();
156 m_parserScheduler.clear(); // Deleting the scheduler will clear any timers.
157 if (m_haveBackgroundParser)
158 stopBackgroundParser();
161 // This kicks off "Once the user agent stops parsing" as described by:
162 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
163 void HTMLDocumentParser::prepareToStopParsing()
165 // FIXME: It may not be correct to disable this for the background parser.
166 // That means hasInsertionPoint() may not be correct in some cases.
167 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser);
169 // pumpTokenizer can cause this parser to be detached from the Document,
170 // but we need to ensure it isn't deleted yet.
171 RefPtr<HTMLDocumentParser> protect(this);
173 // NOTE: This pump should only ever emit buffered character tokens,
174 // so ForceSynchronous vs. AllowYield should be meaningless.
176 ASSERT(!m_haveBackgroundParser);
177 pumpTokenizerIfPossible(ForceSynchronous);
183 DocumentParser::prepareToStopParsing();
185 // We will not have a scriptRunner when parsing a DocumentFragment.
187 document()->setReadyState(Document::Interactive);
189 // Setting the ready state above can fire mutation event and detach us
190 // from underneath. In that case, just bail out.
194 attemptToRunDeferredScriptsAndEnd();
197 bool HTMLDocumentParser::isParsingFragment() const
199 return m_treeBuilder->isParsingFragment();
202 bool HTMLDocumentParser::processingData() const
204 return isScheduledForResume() || inPumpSession() || m_haveBackgroundParser;
207 void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode)
211 if (isWaitingForScripts())
214 // Once a resume is scheduled, HTMLParserScheduler controls when we next pump.
215 if (isScheduledForResume()) {
216 ASSERT(mode == AllowYield);
223 bool HTMLDocumentParser::isScheduledForResume() const
225 return m_parserScheduler && m_parserScheduler->isScheduledForResume();
228 // Used by HTMLParserScheduler
229 void HTMLDocumentParser::resumeParsingAfterYield()
231 ASSERT(!m_isPinnedToMainThread);
232 // pumpTokenizer can cause this parser to be detached from the Document,
233 // but we need to ensure it isn't deleted yet.
234 RefPtr<HTMLDocumentParser> protect(this);
236 if (m_haveBackgroundParser) {
237 pumpPendingSpeculations();
241 // We should never be here unless we can pump immediately. Call pumpTokenizer()
242 // directly so that ASSERTS will fire if we're wrong.
243 pumpTokenizer(AllowYield);
247 void HTMLDocumentParser::runScriptsForPausedTreeBuilder()
249 ASSERT(scriptingContentIsAllowed(parserContentPolicy()));
251 TextPosition scriptStartPosition = TextPosition::belowRangePosition();
252 RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition);
253 // We will not have a scriptRunner when parsing a DocumentFragment.
255 m_scriptRunner->execute(scriptElement.release(), scriptStartPosition);
258 bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& session)
263 ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous);
265 if (isWaitingForScripts()) {
266 if (mode == AllowYield)
267 m_parserScheduler->checkForYieldBeforeScript(session);
269 // If we don't run the script, we cannot allow the next token to be taken.
270 if (session.needsYield)
273 // If we're paused waiting for a script, we try to execute scripts before continuing.
274 runScriptsForPausedTreeBuilder();
277 if (isWaitingForScripts())
281 // FIXME: It's wrong for the HTMLDocumentParser to reach back to the
282 // LocalFrame, but this approach is how the old parser handled
283 // stopping when the page assigns window.location. What really
284 // should happen is that assigning window.location causes the
285 // parser to stop parsing cleanly. The problem is we're not
286 // perpared to do that at every point where we run JavaScript.
287 if (!isParsingFragment()
288 && document()->frame() && document()->frame()->navigationScheduler().locationChangePending())
291 if (mode == AllowYield)
292 m_parserScheduler->checkForYieldBeforeToken(session);
297 void HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> chunk)
299 TRACE_EVENT0("webkit", "HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser");
301 // alert(), runModalDialog, and the JavaScript Debugger all run nested event loops
302 // which can cause this method to be re-entered. We detect re-entry using
303 // hasActiveParser(), save the chunk as a speculation, and return.
304 if (isWaitingForScripts() || !m_speculations.isEmpty() || document()->activeParserCount() > 0) {
305 m_preloader->takeAndPreload(chunk->preloads);
306 m_speculations.append(chunk);
310 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document,
311 // but we need to ensure it isn't deleted yet.
312 RefPtr<HTMLDocumentParser> protect(this);
314 ASSERT(m_speculations.isEmpty());
315 chunk->preloads.clear(); // We don't need to preload because we're going to parse immediately.
316 m_speculations.append(chunk);
317 pumpPendingSpeculations();
320 void HTMLDocumentParser::didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData& data)
322 document()->setEncodingData(data);
325 void HTMLDocumentParser::validateSpeculations(PassOwnPtr<ParsedChunk> chunk)
328 if (isWaitingForScripts()) {
329 // We're waiting on a network script, just save the chunk, we'll get
330 // a second validateSpeculations call after the script completes.
331 // This call should have been made immediately after runScriptsForPausedTreeBuilder
332 // which may have started a network load and left us waiting.
333 ASSERT(!m_lastChunkBeforeScript);
334 m_lastChunkBeforeScript = chunk;
338 ASSERT(!m_lastChunkBeforeScript);
339 OwnPtr<HTMLTokenizer> tokenizer = m_tokenizer.release();
340 OwnPtr<HTMLToken> token = m_token.release();
343 // There must not have been any changes to the HTMLTokenizer state on
344 // the main thread, which means the speculation buffer is correct.
348 // Currently we're only smart enough to reuse the speculation buffer if the tokenizer
349 // both starts and ends in the DataState. That state is simplest because the HTMLToken
350 // is always in the Uninitialized state. We should consider whether we can reuse the
351 // speculation buffer in other states, but we'd likely need to do something more
352 // sophisticated with the HTMLToken.
353 if (chunk->tokenizerState == HTMLTokenizer::DataState
354 && tokenizer->state() == HTMLTokenizer::DataState
355 && m_input.current().isEmpty()
356 && chunk->treeBuilderState == HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get())) {
357 ASSERT(token->isUninitialized());
361 discardSpeculationsAndResumeFrom(chunk, token.release(), tokenizer.release());
364 void HTMLDocumentParser::discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunkBeforeScript, PassOwnPtr<HTMLToken> token, PassOwnPtr<HTMLTokenizer> tokenizer)
366 m_weakFactory.revokeAll();
367 m_speculations.clear();
369 OwnPtr<BackgroundHTMLParser::Checkpoint> checkpoint = adoptPtr(new BackgroundHTMLParser::Checkpoint);
370 checkpoint->parser = m_weakFactory.createWeakPtr();
371 checkpoint->token = token;
372 checkpoint->tokenizer = tokenizer;
373 checkpoint->treeBuilderState = HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get());
374 checkpoint->inputCheckpoint = lastChunkBeforeScript->inputCheckpoint;
375 checkpoint->preloadScannerCheckpoint = lastChunkBeforeScript->preloadScannerCheckpoint;
376 checkpoint->unparsedInput = m_input.current().toString().isolatedCopy();
377 m_input.current().clear(); // FIXME: This should be passed in instead of cleared.
379 ASSERT(checkpoint->unparsedInput.isSafeToSendToAnotherThread());
380 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::resumeFrom, m_backgroundParser, checkpoint.release()));
383 void HTMLDocumentParser::processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> popChunk)
385 TRACE_EVENT0("webkit", "HTMLDocumentParser::processParsedChunkFromBackgroundParser");
387 ASSERT_WITH_SECURITY_IMPLICATION(!document()->activeParserCount());
388 ASSERT(!isParsingFragment());
389 ASSERT(!isWaitingForScripts());
390 ASSERT(!isStopped());
391 // ASSERT that this object is both attached to the Document and protected.
392 ASSERT(refCount() >= 2);
393 ASSERT(shouldUseThreading());
394 ASSERT(!m_tokenizer);
396 ASSERT(!m_lastChunkBeforeScript);
398 ActiveParserSession session(contextForParsingSession());
400 OwnPtr<ParsedChunk> chunk(popChunk);
401 OwnPtr<CompactHTMLTokenStream> tokens = chunk->tokens.release();
403 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::startedChunkWithCheckpoint, m_backgroundParser, chunk->inputCheckpoint));
405 for (XSSInfoStream::const_iterator it = chunk->xssInfos.begin(); it != chunk->xssInfos.end(); ++it) {
406 m_textPosition = (*it)->m_textPosition;
407 m_xssAuditorDelegate.didBlockScript(**it);
412 for (Vector<CompactHTMLToken>::const_iterator it = tokens->begin(); it != tokens->end(); ++it) {
413 ASSERT(!isWaitingForScripts());
415 if (!isParsingFragment()
416 && document()->frame() && document()->frame()->navigationScheduler().locationChangePending()) {
418 // To match main-thread parser behavior (which never checks locationChangePending on the EOF path)
419 // we peek to see if this chunk has an EOF and process it anyway.
420 if (tokens->last().type() == HTMLToken::EndOfFile) {
421 ASSERT(m_speculations.isEmpty()); // There should never be any chunks after the EOF.
422 prepareToStopParsing();
427 m_textPosition = it->textPosition();
429 constructTreeFromCompactHTMLToken(*it);
434 if (isWaitingForScripts()) {
435 ASSERT(it + 1 == tokens->end()); // The </script> is assumed to be the last token of this bunch.
436 runScriptsForPausedTreeBuilder();
437 validateSpeculations(chunk.release());
441 if (it->type() == HTMLToken::EndOfFile) {
442 ASSERT(it + 1 == tokens->end()); // The EOF is assumed to be the last token of this bunch.
443 ASSERT(m_speculations.isEmpty()); // There should never be any chunks after the EOF.
444 prepareToStopParsing();
448 ASSERT(!m_tokenizer);
453 void HTMLDocumentParser::pumpPendingSpeculations()
455 // FIXME: Share this constant with the parser scheduler.
456 const double parserTimeLimit = 0.500;
458 // ASSERT that this object is both attached to the Document and protected.
459 ASSERT(refCount() >= 2);
460 // If this assert fails, you need to call validateSpeculations to make sure
461 // m_tokenizer and m_token don't have state that invalidates m_speculations.
462 ASSERT(!m_tokenizer);
464 ASSERT(!m_lastChunkBeforeScript);
465 ASSERT(!isWaitingForScripts());
466 ASSERT(!isStopped());
468 // FIXME: Pass in current input length.
469 InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), lineNumber().zeroBasedInt());
471 double startTime = currentTime();
473 while (!m_speculations.isEmpty()) {
474 processParsedChunkFromBackgroundParser(m_speculations.takeFirst());
476 // The order matters! If this isStopped(), isWaitingForScripts() can hit and ASSERT since
477 // m_document can be null which is used to decide the readiness.
480 if (isWaitingForScripts())
483 if (currentTime() - startTime > parserTimeLimit && !m_speculations.isEmpty()) {
484 m_parserScheduler->scheduleForResume();
489 InspectorInstrumentation::didWriteHTML(cookie, lineNumber().zeroBasedInt());
492 void HTMLDocumentParser::forcePlaintextForTextDocument()
494 if (shouldUseThreading()) {
495 // This method is called before any data is appended, so we have to start
496 // the background parser ourselves.
497 if (!m_haveBackgroundParser)
498 startBackgroundParser();
500 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::forcePlaintextForTextDocument, m_backgroundParser));
502 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
505 Document* HTMLDocumentParser::contextForParsingSession()
507 // The parsing session should interact with the document only when parsing
508 // non-fragments. Otherwise, we might delay the load event mistakenly.
509 if (isParsingFragment())
514 void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
516 ASSERT(!isStopped());
517 ASSERT(!isScheduledForResume());
518 // ASSERT that this object is both attached to the Document and protected.
519 ASSERT(refCount() >= 2);
522 ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous);
524 PumpSession session(m_pumpSessionNestingLevel, contextForParsingSession());
526 // We tell the InspectorInstrumentation about every pump, even if we
527 // end up pumping nothing. It can filter out empty pumps itself.
528 // FIXME: m_input.current().length() is only accurate if we
529 // end up parsing the whole buffer in this pump. We should pass how
530 // much we parsed as part of didWriteHTML instead of willWriteHTML.
531 InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().currentLine().zeroBasedInt());
533 m_xssAuditor.init(document(), &m_xssAuditorDelegate);
535 while (canTakeNextToken(mode, session) && !session.needsYield) {
536 if (!isParsingFragment())
537 m_sourceTracker.start(m_input.current(), m_tokenizer.get(), token());
539 if (!m_tokenizer->nextToken(m_input.current(), token()))
542 if (!isParsingFragment()) {
543 m_sourceTracker.end(m_input.current(), m_tokenizer.get(), token());
545 // We do not XSS filter innerHTML, which means we (intentionally) fail
546 // http/tests/security/xssAuditor/dom-write-innerHTML.html
547 if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(token(), m_sourceTracker, m_tokenizer->shouldAllowCDATA())))
548 m_xssAuditorDelegate.didBlockScript(*xssInfo);
551 constructTreeFromHTMLToken(token());
552 ASSERT(token().isUninitialized());
555 // Ensure we haven't been totally deref'ed after pumping. Any caller of this
556 // function should be holding a RefPtr to this to ensure we weren't deleted.
557 ASSERT(refCount() >= 1);
562 // There should only be PendingText left since the tree-builder always flushes
563 // the task queue before returning. In case that ever changes, crash.
564 if (mode == ForceSynchronous)
565 m_treeBuilder->flush();
566 RELEASE_ASSERT(!isStopped());
568 if (session.needsYield)
569 m_parserScheduler->scheduleForResume();
571 if (isWaitingForScripts()) {
572 ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
573 if (!m_preloadScanner) {
574 m_preloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url(), document()->devicePixelRatio()));
575 m_preloadScanner->appendToEnd(m_input.current());
577 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL());
580 InspectorInstrumentation::didWriteHTML(cookie, m_input.current().currentLine().zeroBasedInt());
583 void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLToken& rawToken)
585 AtomicHTMLToken token(rawToken);
587 // We clear the rawToken in case constructTreeFromAtomicToken
588 // synchronously re-enters the parser. We don't clear the token immedately
589 // for Character tokens because the AtomicHTMLToken avoids copying the
590 // characters by keeping a pointer to the underlying buffer in the
591 // HTMLToken. Fortunately, Character tokens can't cause us to re-enter
594 // FIXME: Stop clearing the rawToken once we start running the parser off
595 // the main thread or once we stop allowing synchronous JavaScript
596 // execution from parseAttribute.
597 if (rawToken.type() != HTMLToken::Character)
600 m_treeBuilder->constructTree(&token);
602 if (!rawToken.isUninitialized()) {
603 ASSERT(rawToken.type() == HTMLToken::Character);
608 void HTMLDocumentParser::constructTreeFromCompactHTMLToken(const CompactHTMLToken& compactToken)
610 AtomicHTMLToken token(compactToken);
611 m_treeBuilder->constructTree(&token);
614 bool HTMLDocumentParser::hasInsertionPoint()
616 // FIXME: The wasCreatedByScript() branch here might not be fully correct.
617 // Our model of the EOF character differs slightly from the one in
618 // the spec because our treatment is uniform between network-sourced
619 // and script-sourced input streams whereas the spec treats them
621 return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile());
624 void HTMLDocumentParser::insert(const SegmentedString& source)
629 TRACE_EVENT0("webkit", "HTMLDocumentParser::insert");
631 // pumpTokenizer can cause this parser to be detached from the Document,
632 // but we need to ensure it isn't deleted yet.
633 RefPtr<HTMLDocumentParser> protect(this);
636 ASSERT(!inPumpSession());
637 ASSERT(m_haveBackgroundParser || wasCreatedByScript());
638 m_token = adoptPtr(new HTMLToken);
639 m_tokenizer = HTMLTokenizer::create(m_options);
642 SegmentedString excludedLineNumberSource(source);
643 excludedLineNumberSource.setExcludeLineNumbers();
644 m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
645 pumpTokenizerIfPossible(ForceSynchronous);
647 if (isWaitingForScripts()) {
648 // Check the document.write() output with a separate preload scanner as
649 // the main scanner can't deal with insertions.
650 if (!m_insertionPreloadScanner)
651 m_insertionPreloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url(), document()->devicePixelRatio()));
653 m_insertionPreloadScanner->appendToEnd(source);
654 m_insertionPreloadScanner->scan(m_preloader.get(), document()->baseElementURL());
660 void HTMLDocumentParser::startBackgroundParser()
662 ASSERT(!isStopped());
663 ASSERT(shouldUseThreading());
664 ASSERT(!m_haveBackgroundParser);
665 m_haveBackgroundParser = true;
667 RefPtr<WeakReference<BackgroundHTMLParser> > reference = WeakReference<BackgroundHTMLParser>::createUnbound();
668 m_backgroundParser = WeakPtr<BackgroundHTMLParser>(reference);
670 OwnPtr<BackgroundHTMLParser::Configuration> config = adoptPtr(new BackgroundHTMLParser::Configuration);
671 config->options = m_options;
672 config->parser = m_weakFactory.createWeakPtr();
673 config->xssAuditor = adoptPtr(new XSSAuditor);
674 config->xssAuditor->init(document(), &m_xssAuditorDelegate);
675 config->preloadScanner = adoptPtr(new TokenPreloadScanner(document()->url().copy(), document()->devicePixelRatio()));
676 config->decoder = takeDecoder();
678 ASSERT(config->xssAuditor->isSafeToSendToAnotherThread());
679 ASSERT(config->preloadScanner->isSafeToSendToAnotherThread());
680 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::start, reference.release(), config.release()));
683 void HTMLDocumentParser::stopBackgroundParser()
685 ASSERT(shouldUseThreading());
686 ASSERT(m_haveBackgroundParser);
687 m_haveBackgroundParser = false;
689 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::stop, m_backgroundParser));
690 m_weakFactory.revokeAll();
693 void HTMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
698 // We should never reach this point if we're using a parser thread,
699 // as appendBytes() will directly ship the data to the thread.
700 ASSERT(!shouldUseThreading());
702 // pumpTokenizer can cause this parser to be detached from the Document,
703 // but we need to ensure it isn't deleted yet.
704 RefPtr<HTMLDocumentParser> protect(this);
705 TRACE_EVENT1("net", "HTMLDocumentParser::append", "size", inputSource->length());
706 String source(inputSource);
708 if (m_preloadScanner) {
709 if (m_input.current().isEmpty() && !isWaitingForScripts()) {
710 // We have parsed until the end of the current input and so are now moving ahead of the preload scanner.
711 // Clear the scanner so we know to scan starting from the current input point if we block again.
712 m_preloadScanner.clear();
714 m_preloadScanner->appendToEnd(source);
715 if (isWaitingForScripts())
716 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL());
720 m_input.appendToEnd(source);
722 if (inPumpSession()) {
723 // We've gotten data off the network in a nested write.
724 // We don't want to consume any more of the input stream now. Do
725 // not worry. We'll consume this data in a less-nested write().
729 // A couple pinToMainThread() callers require synchronous parsing, but can't
730 // easily use the insert() method, so we hack append() for them to be synchronous.
731 // javascript: url handling is one such caller.
732 // FIXME: This is gross, and we should separate the concept of synchronous parsing
733 // from insert() so that only document.write() uses insert.
734 if (m_isPinnedToMainThread)
735 pumpTokenizerIfPossible(ForceSynchronous);
737 pumpTokenizerIfPossible(AllowYield);
742 void HTMLDocumentParser::end()
744 ASSERT(!isDetached());
745 ASSERT(!isScheduledForResume());
747 if (m_haveBackgroundParser)
748 stopBackgroundParser();
750 // Informs the the rest of WebCore that parsing is really finished (and deletes this).
751 m_treeBuilder->finished();
754 void HTMLDocumentParser::attemptToRunDeferredScriptsAndEnd()
756 ASSERT(isStopping());
757 // FIXME: It may not be correct to disable this for the background parser.
758 // That means hasInsertionPoint() may not be correct in some cases.
759 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser);
760 if (m_scriptRunner && !m_scriptRunner->executeScriptsWaitingForParsing())
765 void HTMLDocumentParser::attemptToEnd()
767 // finish() indicates we will not receive any more data. If we are waiting on
768 // an external script to load, we can't finish parsing quite yet.
770 if (shouldDelayEnd()) {
771 m_endWasDelayed = true;
774 prepareToStopParsing();
777 void HTMLDocumentParser::endIfDelayed()
779 // If we've already been detached, don't bother ending.
783 if (!m_endWasDelayed || shouldDelayEnd())
786 m_endWasDelayed = false;
787 prepareToStopParsing();
790 void HTMLDocumentParser::finish()
792 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
793 // makes sense to call any methods on DocumentParser once it's been stopped.
794 // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
796 // Empty documents never got an append() call, and thus have never started
797 // a background parser. In those cases, we ignore shouldUseThreading()
798 // and fall through to the non-threading case.
799 if (m_haveBackgroundParser) {
800 if (!m_input.haveSeenEndOfFile())
801 m_input.closeWithoutMarkingEndOfFile();
802 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::finish, m_backgroundParser));
808 // We're finishing before receiving any data. Rather than booting up
809 // the background parser just to spin it down, we finish parsing
811 m_token = adoptPtr(new HTMLToken);
812 m_tokenizer = HTMLTokenizer::create(m_options);
815 // We're not going to get any more data off the network, so we tell the
816 // input stream we've reached the end of file. finish() can be called more
817 // than once, if the first time does not call end().
818 if (!m_input.haveSeenEndOfFile())
819 m_input.markEndOfFile();
824 bool HTMLDocumentParser::isExecutingScript() const
828 return m_scriptRunner->isExecutingScript();
831 OrdinalNumber HTMLDocumentParser::lineNumber() const
833 if (m_haveBackgroundParser)
834 return m_textPosition.m_line;
836 return m_input.current().currentLine();
839 TextPosition HTMLDocumentParser::textPosition() const
841 if (m_haveBackgroundParser)
842 return m_textPosition;
844 const SegmentedString& currentString = m_input.current();
845 OrdinalNumber line = currentString.currentLine();
846 OrdinalNumber column = currentString.currentColumn();
848 return TextPosition(line, column);
851 bool HTMLDocumentParser::isWaitingForScripts() const
853 // When the TreeBuilder encounters a </script> tag, it returns to the HTMLDocumentParser
854 // where the script is transfered from the treebuilder to the script runner.
855 // The script runner will hold the script until its loaded and run. During
856 // any of this time, we want to count ourselves as "waiting for a script" and thus
857 // run the preload scanner, as well as delay completion of parsing.
858 bool treeBuilderHasBlockingScript = m_treeBuilder->hasParserBlockingScript();
859 bool scriptRunnerHasBlockingScript = m_scriptRunner && m_scriptRunner->hasParserBlockingScript();
860 // Since the parser is paused while a script runner has a blocking script, it should
861 // never be possible to end up with both objects holding a blocking script.
862 ASSERT(!(treeBuilderHasBlockingScript && scriptRunnerHasBlockingScript));
863 // If either object has a blocking script, the parser should be paused.
864 return treeBuilderHasBlockingScript || scriptRunnerHasBlockingScript;
867 void HTMLDocumentParser::resumeParsingAfterScriptExecution()
869 ASSERT(!isExecutingScript());
870 ASSERT(!isWaitingForScripts());
872 if (m_haveBackgroundParser) {
873 validateSpeculations(m_lastChunkBeforeScript.release());
874 ASSERT(!m_lastChunkBeforeScript);
875 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document,
876 // but we need to ensure it isn't deleted yet.
877 RefPtr<HTMLDocumentParser> protect(this);
878 pumpPendingSpeculations();
882 m_insertionPreloadScanner.clear();
883 pumpTokenizerIfPossible(AllowYield);
887 void HTMLDocumentParser::watchForLoad(Resource* resource)
889 ASSERT(!resource->isLoaded());
890 // addClient would call notifyFinished if the load were complete.
891 // Callers do not expect to be re-entered from this call, so they should
892 // not an already-loaded Resource.
893 resource->addClient(this);
896 void HTMLDocumentParser::stopWatchingForLoad(Resource* resource)
898 resource->removeClient(this);
901 void HTMLDocumentParser::appendCurrentInputStreamToPreloadScannerAndScan()
903 ASSERT(m_preloadScanner);
904 m_preloadScanner->appendToEnd(m_input.current());
905 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL());
908 void HTMLDocumentParser::notifyFinished(Resource* cachedResource)
910 // pumpTokenizer can cause this parser to be detached from the Document,
911 // but we need to ensure it isn't deleted yet.
912 RefPtr<HTMLDocumentParser> protect(this);
914 ASSERT(m_scriptRunner);
915 ASSERT(!isExecutingScript());
917 attemptToRunDeferredScriptsAndEnd();
921 m_scriptRunner->executeScriptsWaitingForLoad(cachedResource);
922 if (!isWaitingForScripts())
923 resumeParsingAfterScriptExecution();
926 void HTMLDocumentParser::executeScriptsWaitingForResources()
928 // Document only calls this when the Document owns the DocumentParser
929 // so this will not be called in the DocumentFragment case.
930 ASSERT(m_scriptRunner);
931 // Ignore calls unless we have a script blocking the parser waiting on a
932 // stylesheet load. Otherwise we are currently parsing and this
933 // is a re-entrant call from encountering a </ style> tag.
934 if (!m_scriptRunner->hasScriptsWaitingForResources())
937 // pumpTokenizer can cause this parser to be detached from the Document,
938 // but we need to ensure it isn't deleted yet.
939 RefPtr<HTMLDocumentParser> protect(this);
940 m_scriptRunner->executeScriptsWaitingForResources();
941 if (!isWaitingForScripts())
942 resumeParsingAfterScriptExecution();
945 void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
947 RefPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
948 parser->insert(source); // Use insert() so that the parser will not yield.
950 ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151>
951 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
954 void HTMLDocumentParser::suspendScheduledTasks()
956 if (m_parserScheduler)
957 m_parserScheduler->suspend();
960 void HTMLDocumentParser::resumeScheduledTasks()
962 if (m_parserScheduler)
963 m_parserScheduler->resume();
966 void HTMLDocumentParser::appendBytes(const char* data, size_t length)
968 if (!length || isStopped())
971 if (shouldUseThreading()) {
972 if (!m_haveBackgroundParser)
973 startBackgroundParser();
975 OwnPtr<Vector<char> > buffer = adoptPtr(new Vector<char>(length));
976 memcpy(buffer->data(), data, length);
977 TRACE_EVENT1("net", "HTMLDocumentParser::appendBytes", "size", (unsigned)length);
979 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::appendBytes, m_backgroundParser, buffer.release()));
983 DecodedDataDocumentParser::appendBytes(data, length);
986 void HTMLDocumentParser::flush()
988 // If we've got no decoder, we never received any data.
989 if (isDetached() || needsDecoder())
992 if (m_haveBackgroundParser)
993 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::flush, m_backgroundParser));
995 DecodedDataDocumentParser::flush();
998 void HTMLDocumentParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder)
1001 DecodedDataDocumentParser::setDecoder(decoder);
1003 if (m_haveBackgroundParser)
1004 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::setDecoder, m_backgroundParser, takeDecoder()));